1 /* sgmldecl.c -
2    SGML declaration parsing.
3 
4    Written by James Clark (jjc@jclark.com).
5 */
6 
7 #include "sgmlincl.h"
8 
9 /* Symbolic names for the error numbers that are be generated only by
10 this module. */
11 
12 #define E_SHUNCHAR 159
13 #define E_STANDARD 163
14 #define E_SIGNIFICANT 164
15 #define E_BADLIT 165
16 #define E_SCOPE 166
17 #define E_XNUM 167
18 #define E_BADVERSION 168
19 #define E_NMUNSUP 169
20 #define E_XNMLIT 170
21 #define E_CHARDESC 171
22 #define E_CHARDUP 172
23 #define E_CHARRANGE 173
24 #define E_7BIT 174
25 #define E_CHARMISSING 175
26 #define E_SHUNNED 176
27 #define E_NONSGML 177
28 #define E_CAPSET 178
29 #define E_CAPMISSING 179
30 #define E_SYNTAX 180
31 #define E_CHARNUM 181
32 #define E_SWITCHES 182
33 #define E_INSTANCE 183
34 #define E_ZEROFEATURE 184
35 #define E_YESNO 185
36 #define E_CAPACITY 186
37 #define E_NOTSUPPORTED 187
38 #define E_FORMAL 189
39 #define E_BADCLASS 190
40 #define E_MUSTBENON 191
41 #define E_BADBASECHAR 199
42 #define E_SYNREFUNUSED 200
43 #define E_SYNREFUNDESC 201
44 #define E_SYNREFUNKNOWN 202
45 #define E_SYNREFUNKNOWNSET 203
46 #define E_FUNDUP 204
47 #define E_BADFUN 205
48 #define E_FUNCHAR 206
49 #define E_GENDELIM 207
50 #define E_SRDELIM 208
51 #define E_BADKEY 209
52 #define E_BADQUANTITY 210
53 #define E_BADNAME 211
54 #define E_REFNAME 212
55 #define E_DUPNAME 213
56 #define E_QUANTITY 214
57 #define E_QTOOBIG 215
58 #define E_NMSTRTCNT 219
59 #define E_NMCHARCNT 220
60 #define E_NMDUP 221
61 #define E_NMBAD 222
62 #define E_NMMINUS 223
63 #define E_UNKNOWNSET 227
64 #define E_TOTALCAP 235
65 
66 #define CANON_NMC '.'		/* Canonical name character. */
67 #define CANON_NMS 'A'		/* Canonical name start character. */
68 #define CANON_MIN ':'		/* Canonical minimum data character. */
69 
70 #define SUCCESS 1
71 #define FAIL 0
72 #define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
73 #define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0)
74 
75 static UNCH standard[] = "ISO 8879:1986";
76 
77 #define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN"
78 #define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN"
79 
80 static UNCH (*newkey)[REFNAMELEN+1] = 0;
81 
82 struct pmap {
83      char *name;
84      UNIV value;
85 };
86 
87 /* The reference capacity set. */
88 #define REFCAPSET \
89 { 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \
90 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L }
91 
92 long refcapset[NCAPACITY] = REFCAPSET;
93 
94 /* A pmap of known capacity sets. */
95 
96 static struct pmap capset_map[] = {
97      { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset },
98      { 0 },
99 };
100 
101 /* Table of capacity names.  Must match *CAP in sgmldecl.h. */
102 
103 char *captab[] = {
104      "TOTALCAP",
105      "ENTCAP",
106      "ENTCHCAP",
107      "ELEMCAP",
108      "GRPCAP",
109      "EXGRPCAP",
110      "EXNMCAP",
111      "ATTCAP",
112      "ATTCHCAP",
113      "AVGRPCAP",
114      "NOTCAP",
115      "NOTCHCAP",
116      "IDCAP",
117      "IDREFCAP",
118      "MAPCAP",
119      "LKSETCAP",
120      "LKNMCAP",
121 };
122 
123 /* The default SGML declaration. */
124 #define MAXNUMBER 99999999L
125 
126 /* Reference quantity set */
127 
128 #define REFATTCNT 40
129 #define REFATTSPLEN 960
130 #define REFBSEQLEN 960
131 #define REFDTAGLEN 16
132 #define REFDTEMPLEN 16
133 #define REFENTLVL 16
134 #define REFGRPCNT 32
135 #define REFGRPGTCNT 96
136 #define REFGRPLVL 16
137 #define REFNORMSEP 2
138 #define REFPILEN 240
139 #define REFTAGLEN 960
140 #define REFTAGLVL 24
141 
142 #define ALLOC_MAX 65534
143 
144 #define BIGINT 30000
145 
146 #define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2)
147 #define MAXATTSPLEN BIGINT
148 #define MAXBSEQLEN BIGINT
149 #define MAXDTAGLEN 16
150 #define MAXDTEMPLEN 16
151 #define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1)
152 #define MAXGRPCNT MAXGRPGTCNT
153 /* Must be between 96 and 253 */
154 #define MAXGRPGTCNT 253
155 #define MAXGRPLVL MAXGRPGTCNT
156 #define MAXLITLEN BIGINT
157 /* This guarantees that NAMELEN < LITLEN (ie there's always space for a name
158 in a buffer intended for a literal.) */
159 #define MAXNAMELEN (REFLITLEN - 1)
160 #define MAXNORMSEP 2
161 #define MAXPILEN BIGINT
162 #define MAXTAGLEN BIGINT
163 #define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1)
164 
165 /* Table of quantity names.  Must match Q* in sgmldecl.h. */
166 
167 static char *quantity_names[] = {
168     "ATTCNT",
169     "ATTSPLEN",
170     "BSEQLEN",
171     "DTAGLEN",
172     "DTEMPLEN",
173     "ENTLVL",
174     "GRPCNT",
175     "GRPGTCNT",
176     "GRPLVL",
177     "LITLEN",
178     "NAMELEN",
179     "NORMSEP",
180     "PILEN",
181     "TAGLEN",
182     "TAGLVL",
183 };
184 
185 static int max_quantity[] = {
186     MAXATTCNT,
187     MAXATTSPLEN,
188     MAXBSEQLEN,
189     MAXDTAGLEN,
190     MAXDTEMPLEN,
191     MAXENTLVL,
192     MAXGRPCNT,
193     MAXGRPGTCNT,
194     MAXGRPLVL,
195     MAXLITLEN,
196     MAXNAMELEN,
197     MAXNORMSEP,
198     MAXPILEN,
199     MAXTAGLEN,
200     MAXTAGLVL,
201 };
202 
203 static char *quantity_changed;
204 
205 /* Non-zero means the APPINFO parameter was not NONE. */
206 static int appinfosw = 0;
207 
208 struct sgmldecl sd = {
209      REFCAPSET,			/* capacity */
210 #ifdef SUPPORT_SUBDOC
211      MAXNUMBER,			/* subdoc */
212 #else /* not SUPPORT_SUBDOC */
213      0,				/* subdoc */
214 #endif /* not SUPPORT_SUBDOC */
215      1,				/* formal */
216      1,				/* omittag */
217      1,				/* shorttag */
218      1,				/* shortref */
219      { 1, 0 },			/* general/entity name case translation */
220      {				/* reference quantity set */
221 	  REFATTCNT,
222 	  REFATTSPLEN,
223 	  REFBSEQLEN,
224 	  REFDTAGLEN,
225 	  REFDTEMPLEN,
226 	  REFENTLVL,
227 	  REFGRPCNT,
228 	  REFGRPGTCNT,
229 	  REFGRPLVL,
230 	  REFLITLEN,
231 	  REFNAMELEN,
232 	  REFNORMSEP,
233 	  REFPILEN,
234 	  REFTAGLEN,
235 	  REFTAGLVL,
236      },
237 };
238 
239 static int systemcharset[] = {
240 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
241 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
242 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
243 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
244 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
245 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
246 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
247 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
248 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
249 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
250 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
251 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
252 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
253 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
254 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
255 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
256 };
257 
258 /* This is a private use designating sequence that by convention
259 refers to the whole system character set whatever it is. */
260 
261 #define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0"
262 
263 static struct pmap charset_map[] = {
264      { "ESC 2/5 4/0", (UNIV)iso646charset }, /* ISO 646 IRV */
265      { "ESC 2/8 4/2", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */
266      { "ESC 2/8 4/0", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */
267      { "ESC 2/13 4/1", (UNIV)iso8859_1charset }, /* Latin 1 */
268      { "ESC 2/1 4/0", (UNIV)iso646C0charset }, /* ISO 646, C0 */
269      { "ESC 2/2 4/3", (UNIV)iso6429C1charset }, /* ISO 6429, C1 */
270      { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
271 				/* system character set */
272      { 0 }
273 };
274 
275 static int synrefcharset[256];	/* the syntax reference character set */
276 
277 #define CHAR_NONSGML 01
278 #define CHAR_SIGNIFICANT 02
279 #define CHAR_MAGIC 04
280 #define CHAR_SHUNNED 010
281 
282 static UNCH char_flags[256];
283 static int done_nonsgml = 0;
284 static UNCH *nlextoke = 0;	/* new lextoke */
285 static UNCH *nlextran = 0;	/* new lextran */
286 #define MAX_SAVED_ERRS 4
287 static UNIV saved_errs[MAX_SAVED_ERRS];
288 static int nsaved_errs = 0;
289 
290 static UNCH kcharset[] = "CHARSET";
291 static UNCH kbaseset[] = "BASESET";
292 static UNCH kdescset[] = "DESCSET";
293 static UNCH kunused[] = "UNUSED";
294 static UNCH kcapacity[] = "CAPACITY";
295 static UNCH kpublic[] = "PUBLIC";
296 static UNCH ksgmlref[] = "SGMLREF";
297 static UNCH kscope[] = "SCOPE";
298 static UNCH kdocument[] = "DOCUMENT";
299 static UNCH kinstance[] = "INSTANCE";
300 static UNCH ksyntax[] = "SYNTAX";
301 static UNCH kswitches[] = "SWITCHES";
302 static UNCH kfeatures[] = "FEATURES";
303 static UNCH kminimize[] = "MINIMIZE";
304 static UNCH kdatatag[] = "DATATAG";
305 static UNCH komittag[] = "OMITTAG";
306 static UNCH krank[] = "RANK";
307 static UNCH kshorttag[] = "SHORTTAG";
308 static UNCH klink[] = "LINK";
309 static UNCH ksimple[] = "SIMPLE";
310 static UNCH kimplicit[] = "IMPLICIT";
311 static UNCH kexplicit[] = "EXPLICIT";
312 static UNCH kother[] = "OTHER";
313 static UNCH kconcur[] = "CONCUR";
314 static UNCH ksubdoc[] = "SUBDOC";
315 static UNCH kformal[] = "FORMAL";
316 static UNCH kyes[] = "YES";
317 static UNCH kno[] = "NO";
318 static UNCH kappinfo[] = "APPINFO";
319 static UNCH knone[] = "NONE";
320 static UNCH kshunchar[] = "SHUNCHAR";
321 static UNCH kcontrols[] = "CONTROLS";
322 static UNCH kfunction[] = "FUNCTION";
323 static UNCH krs[] = "RS";
324 static UNCH kre[] = "RE";
325 static UNCH kspace[] = "SPACE";
326 static UNCH knaming[] = "NAMING";
327 static UNCH klcnmstrt[] = "LCNMSTRT";
328 static UNCH kucnmstrt[] = "UCNMSTRT";
329 static UNCH klcnmchar[] = "LCNMCHAR";
330 static UNCH kucnmchar[] = "UCNMCHAR";
331 static UNCH knamecase[] = "NAMECASE";
332 static UNCH kdelim[] = "DELIM";
333 static UNCH kgeneral[] = "GENERAL";
334 static UNCH kentity[] = "ENTITY";
335 static UNCH kshortref[] = "SHORTREF";
336 static UNCH knames[] = "NAMES";
337 static UNCH kquantity[] = "QUANTITY";
338 
339 #define sderr mderr
340 
341 static UNIV pmaplookup P((struct pmap *, char *));
342 static UNCH *ltous P((long));
343 static VOID sdfixstandard P((UNCH *, int));
344 static int sdparm P((UNCH *, struct parse *));
345 static int sdname P((UNCH *, UNCH *));
346 static int sdckname P((UNCH *, UNCH *));
347 static int sdversion P((UNCH *));
348 static int sdcharset P((UNCH *));
349 static int sdcsdesc P((UNCH *, int *));
350 static int sdpubcapacity P((UNCH *));
351 static int sdcapacity P((UNCH *));
352 static int sdscope P((UNCH *));
353 static VOID setlexical P((void));
354 static VOID noemptytag P((void));
355 static int sdpubsyntax P((UNCH *));
356 static int sdsyntax P((UNCH *));
357 static int sdxsyntax P((UNCH *));
358 static int sdtranscharnum P((UNCH *));
359 static int sdtranschar P((int));
360 static int sdshunchar P((UNCH *));
361 static int sdsynref P((UNCH *));
362 static int sdfunction P((UNCH *));
363 static int sdnaming P((UNCH *));
364 static int sddelim P((UNCH *));
365 static int sdnames P((UNCH *));
366 static int sdquantity P((UNCH *));
367 static int sdfeatures P((UNCH *));
368 static int sdappinfo P((UNCH *));
369 static VOID sdsaverr P((UNS, UNCH *, UNCH *));
370 
371 static VOID bufsalloc P((void));
372 static VOID bufsrealloc P((void));
373 
374 /* Parse the SGML declaration. Return non-zero if there was some appinfo. */
375 
sgmldecl()376 int sgmldecl()
377 {
378      int i;
379      int errsw = 0;
380      UNCH endbuf[REFNAMELEN+2];	/* buffer for parsing terminating > */
381      static int (*section[]) P((UNCH *)) = {
382 	  sdversion,
383 	  sdcharset,
384 	  sdcapacity,
385 	  sdscope,
386 	  sdsyntax,
387 	  sdfeatures,
388 	  sdappinfo,
389      };
390      /* These are needed if we use mderr. */
391      parmno = 0;
392      mdname = sgmlkey;
393      subdcl = NULL;
394      nsaved_errs = 0;
395      for (i = 0; i < SIZEOF(section); i++)
396 	  if ((*section[i])(tbuf) == FAIL) {
397 	       errsw = 1;
398 	       break;
399 	  }
400      if (sd.formal) {
401 	  /* print saved errors */
402 	  int i;
403 	  for (i = 0; i < nsaved_errs; i++)
404 	       svderr(saved_errs[i]);
405      }
406      else {
407 	  /* free saved errors */
408 	  int i;
409 	  for (i = 0; i < nsaved_errs; i++)
410 	       msgsfree(saved_errs[i]);
411      }
412 
413      if (!errsw)
414 	  setlexical();
415      bufsrealloc();
416      /* Parse the >.  Don't overwrite the appinfo. */
417      if (!errsw)
418 	  sdparm(endbuf, 0);
419      /* We must exit if we hit end of document. */
420      if (pcbsd.action == EOD_)
421 	  exiterr(161, &pcbsd);
422      if (!errsw && pcbsd.action != ESGD)
423 	  sderr(126, (UNCH *)0, (UNCH *)0);
424      return appinfosw;
425 }
426 
427 /* Parse the literal (which should contain the version of the
428 standard) at the beginning of a SGML declaration. */
429 
sdversion(tbuf)430 static int sdversion(tbuf)
431 UNCH *tbuf;
432 {
433      if (sdparm(tbuf, &pcblitv) != LIT1) {
434 	  sderr(123, (UNCH *)0, (UNCH *)0);
435 	  return FAIL;
436      }
437      sdfixstandard(tbuf, 0);
438      if (ustrcmp(tbuf, standard) != 0)
439 	  sderr(E_BADVERSION, tbuf, standard);
440      return SUCCESS;
441 }
442 
443 /* Parse the CHARSET section. Use one token lookahead. */
444 
sdcharset(tbuf)445 static int sdcharset(tbuf)
446 UNCH *tbuf;
447 {
448      int i;
449      int status[256];
450 
451      if (sdname(tbuf, kcharset) == FAIL) return FAIL;
452      (void)sdparm(tbuf, 0);
453 
454      if (sdcsdesc(tbuf, status) == FAIL)
455 	  return FAIL;
456 
457 #if 0
458      for (i = 128; i < 256; i++)
459 	  if (status[i] != UNDESC)
460 	       break;
461      if (i >= 256) {
462 	  /* Only a 7-bit character set was described.  Fill it out to 8-bits. */
463 	  for (i = 128; i < 256; i++)
464 	       status[i] = UNUSED;
465 #if 0
466 	  sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
467 #endif
468      }
469 #endif
470      /* Characters that are declared UNUSED in the document character set
471 	are assigned to non-SGML. */
472      for (i = 0; i < 256; i++) {
473 	  if (status[i] == UNDESC) {
474 #if 0
475 	       sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
476 #endif
477 	       char_flags[i] |= CHAR_NONSGML;
478 	  }
479 	  else if (status[i] == UNUSED)
480 	       char_flags[i] |= CHAR_NONSGML;
481      }
482      done_nonsgml = 1;
483      return SUCCESS;
484 }
485 
486 /* Parse a character set description.   Uses one character lookahead. */
487 
sdcsdesc(tbuf,status)488 static int sdcsdesc(tbuf, status)
489 UNCH *tbuf;
490 int *status;
491 {
492      int i;
493      int nsets = 0;
494      struct fpi fpi;
495 
496      for (i = 0; i < 256; i++)
497 	  status[i] = UNDESC;
498 
499      for (;;) {
500 	  int nchars;
501 	  int *baseset = 0;
502 
503 	  if (pcbsd.action != NAS1) {
504 	       if (nsets == 0) {
505 		    sderr(120, (UNCH *)0, (UNCH *)0);
506 		    return FAIL;
507 	       }
508 	       break;
509 	  }
510 	  if (!matches(tbuf, kbaseset)) {
511 	       if (nsets == 0) {
512 		    sderr(118, tbuf+1, kbaseset);
513 		    return FAIL;
514 	       }
515 	       break;
516 	  }
517 	  nsets++;
518 	  MEMZERO((UNIV)&fpi, FPISZ);
519 	  if (sdparm(tbuf, &pcblitv) != LIT1) {
520 	       sderr(123, (UNCH *)0, (UNCH *)0);
521 	       return FAIL;
522 	  }
523 	  fpi.fpipubis = tbuf;
524 	  /* Give a warning if it is not a CHARSET fpi. */
525 	  if (parsefpi(&fpi))
526 	       sdsaverr(E_FORMAL, (UNCH *)0, (UNCH *)0);
527 	  else if (fpi.fpic != FPICHARS)
528 	       sdsaverr(E_BADCLASS, kcharset, (UNCH *)0);
529 	  else {
530 	       fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
531 	       baseset = (int *)pmaplookup(charset_map,
532 					   (char *)fpi.fpipubis + fpi.fpil);
533 	       if (!baseset)
534 		    sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0);
535 	  }
536 	  if (sdname(tbuf, kdescset) == FAIL) return FAIL;
537 	  nchars = 0;
538 	  for (;;) {
539 	       long start, count;
540 	       long basenum;
541 	       if (sdparm(tbuf, 0) != NUM1)
542 		    break;
543 	       start = atol((char *)tbuf);
544 	       if (sdparm(tbuf, 0) != NUM1) {
545 		    sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
546 		    return FAIL;
547 	       }
548 	       count = atol((char *)tbuf);
549 	       switch (sdparm(tbuf, &pcblitv)) {
550 	       case NUM1:
551 		    basenum = atol((char *)tbuf);
552 		    break;
553 	       case LIT1:
554 		    basenum = UNKNOWN;
555 		    break;
556 	       case NAS1:
557 		    if (matches(tbuf, kunused)) {
558 			 basenum = UNUSED;
559 			 break;
560 		    }
561 		    /* fall through */
562 	       default:
563 		    sderr(E_CHARDESC, ltous(start), (UNCH *)0);
564 		    return FAIL;
565 	       }
566 	       if (start + count > 256)
567 		    sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
568 	       else {
569 		    int i;
570 		    int lim = (int)start + count;
571 		    for (i = (int)start; i < lim; i++) {
572 			 if (status[i] != UNDESC)
573 			      sderr(E_CHARDUP, ltous((long)i), (UNCH *)0);
574 			 else if (basenum == UNUSED || basenum == UNKNOWN)
575 			      status[i] = (int)basenum;
576 			 else if (baseset == 0)
577 			      status[i] = UNKNOWN_SET;
578 			 else {
579 			      int n = basenum + (i - start);
580 			      if (n < 0 || n > 255)
581 				   sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
582 			      else {
583 				   if (baseset[n] == UNUSED)
584 					sderr(E_BADBASECHAR, ltous((long)n),
585 					      (UNCH *)0);
586 				   status[i] = baseset[n];
587 			      }
588 			 }
589 		    }
590 	       }
591 	       nchars++;
592 	  }
593 	  if (nchars == 0) {
594 	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
595 	       return FAIL;
596 	  }
597      }
598      return SUCCESS;
599 }
600 
601 /* Parse the CAPACITY section.  Uses one token lookahead. */
602 
sdcapacity(tbuf)603 static int sdcapacity(tbuf)
604 UNCH *tbuf;
605 {
606      int ncap;
607      int i;
608 
609      if (sdckname(tbuf, kcapacity) == FAIL)
610 	  return FAIL;
611      if (sdparm(tbuf, 0) != NAS1) {
612 	  sderr(120, (UNCH *)0, (UNCH *)0);
613 	  return FAIL;
614      }
615      if (matches(tbuf, kpublic))
616 	  return sdpubcapacity(tbuf);
617      if (!matches(tbuf, ksgmlref)) {
618 	  sderr(E_CAPACITY, tbuf+1, (UNCH *)0);
619 	  return FAIL;
620      }
621      memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));
622      ncap = 0;
623      for (;;) {
624 	  int capno = -1;
625 	  int i;
626 
627 	  if (sdparm(tbuf, 0) != NAS1)
628 	       break;
629 	  for (i = 0; i < SIZEOF(captab); i++)
630 	       if (matches(tbuf, captab[i])) {
631 		    capno = i;
632 		    break;
633 	       }
634 	  if (capno < 0)
635 	       break;
636 	  if (sdparm(tbuf, 0) != NUM1) {
637 	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
638 	       return FAIL;
639 	  }
640 	  sd.capacity[capno] = atol((char *)tbuf);
641 	  ncap++;
642      }
643      if (ncap == 0) {
644 	  sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
645 	  return FAIL;
646      }
647      for (i = 1; i < NCAPACITY; i++)
648 	  if (sd.capacity[i] > sd.capacity[0])
649 	       sderr(E_TOTALCAP, (UNCH *)captab[i], (UNCH *)0);
650      return SUCCESS;
651 }
652 
653 /* Parse a CAPACITY section that started with PUBLIC.  Must do one
654 token lookahead, since sdcapacity() also does. */
655 
sdpubcapacity(tbuf)656 static int sdpubcapacity(tbuf)
657 UNCH *tbuf;
658 {
659      UNIV ptr;
660      if (sdparm(tbuf, &pcblitv) != LIT1) {
661 	  sderr(123, (UNCH *)0, (UNCH *)0);
662 	  return FAIL;
663      }
664      sdfixstandard(tbuf, 1);
665      ptr = pmaplookup(capset_map, (char *)tbuf);
666      if (!ptr)
667 	  sderr(E_CAPSET, tbuf, (UNCH *)0);
668      else
669 	  memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));
670      (void)sdparm(tbuf, 0);
671      return SUCCESS;
672 }
673 
674 /* Parse the SCOPE section. Uses no lookahead. */
675 
sdscope(tbuf)676 static int sdscope(tbuf)
677 UNCH *tbuf;
678 {
679      if (sdckname(tbuf, kscope) == FAIL)
680 	  return FAIL;
681      if (sdparm(tbuf, 0) != NAS1) {
682 	  sderr(120, (UNCH *)0, (UNCH *)0);
683 	  return FAIL;
684      }
685      if (matches(tbuf, kdocument))
686 	  ;
687      else if (matches(tbuf, kinstance))
688 	  sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);
689      else {
690 	  sderr(E_SCOPE, tbuf+1, (UNCH *)0);
691 	  return FAIL;
692      }
693      return SUCCESS;
694 }
695 
696 /* Parse the SYNTAX section.  Uses one token lookahead. */
697 
sdsyntax(tbuf)698 static int sdsyntax(tbuf)
699 UNCH *tbuf;
700 {
701      if (sdname(tbuf, ksyntax) == FAIL) return FAIL;
702      if (sdparm(tbuf, 0) != NAS1) {
703 	  sderr(120, (UNCH *)0, (UNCH *)0);
704 	  return FAIL;
705      }
706      if (matches(tbuf, kpublic))
707 	  return sdpubsyntax(tbuf);
708      return sdxsyntax(tbuf);
709 }
710 
711 /* Parse the SYNTAX section which starts with PUBLIC.  Uses one token
712 lookahead. */
713 
sdpubsyntax(tbuf)714 static int sdpubsyntax(tbuf)
715 UNCH *tbuf;
716 {
717      int nswitches;
718      if (sdparm(tbuf, &pcblitv) != LIT1)
719 	  return FAIL;
720      sdfixstandard(tbuf, 1);
721      if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
722 	  sd.shortref = 0;
723      else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
724 	  sd.shortref = 1;
725      else
726 	  sderr(E_SYNTAX, tbuf, (UNCH *)0);
727      if (sdparm(tbuf, 0) != NAS1)
728 	  return SUCCESS;
729      if (!matches(tbuf, kswitches))
730 	  return SUCCESS;
731      nswitches = 0;
732      for (;;) {
733 	  int errsw = 0;
734 
735 	  if (sdparm(tbuf, 0) != NUM1)
736 	       break;
737 	  if (atol((char *)tbuf) > 255) {
738 	       sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
739 	       errsw = 1;
740 	  }
741 	  if (sdparm(tbuf, 0) != NUM1) {
742 	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
743 	       return FAIL;
744 	  }
745 	  if (!errsw) {
746 	       if (atol((char *)tbuf) > 255)
747 		    sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
748 	  }
749 	  nswitches++;
750      }
751      if (nswitches == 0) {
752 	  sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
753 	  return FAIL;
754      }
755      sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);
756      return SUCCESS;
757 }
758 
759 /* Parse an explicit concrete syntax. Uses one token lookahead. */
760 
761 static
sdxsyntax(tbuf)762 int sdxsyntax(tbuf)
763 UNCH *tbuf;
764 {
765      static int (*section[]) P((UNCH *)) = {
766 	  sdshunchar,
767 	  sdsynref,
768 	  sdfunction,
769 	  sdnaming,
770 	  sddelim,
771 	  sdnames,
772 	  sdquantity,
773      };
774      int i;
775 
776      for (i = 0; i < SIZEOF(section); i++)
777 	  if ((*section[i])(tbuf) == FAIL)
778 	       return FAIL;
779      return SUCCESS;
780 }
781 
782 /* Parse the SHUNCHAR section. Uses one token lookahead. */
783 
784 static
sdshunchar(tbuf)785 int sdshunchar(tbuf)
786 UNCH *tbuf;
787 {
788      int i;
789      for (i = 0; i < 256; i++)
790 	  char_flags[i] &= ~CHAR_SHUNNED;
791 
792      if (sdckname(tbuf, kshunchar) == FAIL)
793 	  return FAIL;
794 
795      if (sdparm(tbuf, 0) == NAS1) {
796 	  if (matches(tbuf, knone)) {
797 	       (void)sdparm(tbuf, 0);
798 	       return SUCCESS;
799 	  }
800 	  if (matches(tbuf, kcontrols)) {
801 	       for (i = 0; i < 256; i++)
802 		    if (ISASCII(i) && iscntrl(i))
803 			 char_flags[i] |= CHAR_SHUNNED;
804 	       if (sdparm(tbuf, 0) != NUM1)
805 		    return SUCCESS;
806 	  }
807      }
808      if (pcbsd.action != NUM1) {
809 	  sderr(E_SHUNCHAR, (UNCH *)0, (UNCH *)0);
810 	  return FAIL;
811      }
812      do {
813 	  long n = atol((char *)tbuf);
814 	  if (n > 255)
815 	       sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
816 	  else
817 	       char_flags[(int)n] |= CHAR_SHUNNED;
818      } while (sdparm(tbuf, 0) == NUM1);
819      return SUCCESS;
820 }
821 
822 /* Parse the syntax reference character set. Uses one token lookahead. */
823 
824 static
sdsynref(tbuf)825 int sdsynref(tbuf)
826 UNCH *tbuf;
827 {
828      return sdcsdesc(tbuf, synrefcharset);
829 }
830 
831 /* Translate a character number from the syntax reference character set
832 to the system character set. If it can't be done, give an error message
833 and return -1. */
834 
835 static
sdtranscharnum(tbuf)836 int sdtranscharnum(tbuf)
837 UNCH *tbuf;
838 {
839      long n = atol((char *)tbuf);
840      if (n > 255) {
841 	  sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
842 	  return -1;
843      }
844      return sdtranschar((int)n);
845 }
846 
847 
848 static
sdtranschar(n)849 int sdtranschar(n)
850 int n;
851 {
852      int ch = synrefcharset[n];
853      if (ch >= 0)
854 	  return ch;
855      switch (ch) {
856      case UNUSED:
857 	  sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);
858 	  break;
859      case UNDESC:
860 	  sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);
861 	  break;
862      case UNKNOWN:
863 	  sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);
864 	  break;
865      case UNKNOWN_SET:
866 	  sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);
867 	  break;
868      default:
869 	  abort();
870      }
871      return -1;
872 }
873 
874 
875 /* Parse the function section. Uses two tokens lookahead. "NAMING"
876 could be a function name. */
877 
878 static
sdfunction(tbuf)879 int sdfunction(tbuf)
880 UNCH *tbuf;
881 {
882      static UNCH *fun[] = { kre, krs, kspace };
883      static int funval[] = { RECHAR, RSCHAR, ' ' };
884      int i;
885      int had_tab = 0;
886      int changed = 0;		/* attempted to change reference syntax */
887 
888      if (sdckname(tbuf, kfunction) == FAIL)
889 	  return FAIL;
890      for (i = 0; i < SIZEOF(fun); i++) {
891 	  int ch;
892 	  if (sdname(tbuf, fun[i]) == FAIL)
893 	       return FAIL;
894 	  if (sdparm(tbuf, 0) != NUM1) {
895 	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
896 	       return FAIL;
897 	  }
898 	  ch = sdtranscharnum(tbuf);
899 	  if (ch >= 0 && ch != funval[i])
900 	       changed = 1;
901      }
902      for (;;) {
903 	  int tabsw = 0;
904 	  int namingsw = 0;
905 	  if (sdparm(tbuf, 0) != NAS1) {
906 	       sderr(120, (UNCH *)0, (UNCH *)0);
907 	       return FAIL;
908 	  }
909 	  if (matches(tbuf, (UNCH *)"TAB")) {
910 	       tabsw = 1;
911 	       if (had_tab)
912 		    sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);
913 	  }
914 	  else {
915 	       for (i = 0; i < SIZEOF(fun); i++)
916 		    if (matches(tbuf, fun[i]))
917 			 sderr(E_BADFUN, fun[i], (UNCH *)0);
918 	       if (matches(tbuf, knaming))
919 		    namingsw = 1;
920 	       else
921 		    changed = 1;
922 	  }
923 	  if (sdparm(tbuf, 0) != NAS1) {
924 	       sderr(120, (UNCH *)0, (UNCH *)0);
925 	       return FAIL;
926 	  }
927 	  if (namingsw) {
928 	       if (matches(tbuf, klcnmstrt))
929 		    break;
930 	       changed = 1;
931 	  }
932 	  if (sdparm(tbuf, 0) != NUM1) {
933 	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
934 	       return FAIL;
935 	  }
936 	  if (tabsw && !had_tab) {
937 	       int ch = sdtranscharnum(tbuf);
938 	       if (ch >= 0 && ch != TABCHAR)
939 		    changed = 1;
940 	       had_tab = 1;
941 	  }
942 
943      }
944      if (!had_tab)
945 	  changed = 1;
946      if (changed)
947 	  sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);
948      return SUCCESS;
949 }
950 
951 /* Parse the NAMING section.  Uses no lookahead. */
952 
953 static
sdnaming(tbuf)954 int sdnaming(tbuf)
955 UNCH *tbuf;
956 {
957      int i;
958      int bad = 0;
959      static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };
960      static UNCH *types[] = { kgeneral, kentity };
961 
962 #define NCLASSES SIZEOF(classes)
963 
964      int bufsize = 4;		/* allocated size of buf */
965      UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters
966 					      in naming classes */
967      int bufi = 0;		/* next index into buf */
968      int start[NCLASSES];	/* index of first character for each class */
969      int count[NCLASSES];	/* number of characters for each class */
970 
971      for (i = 0; i < NCLASSES; i++) {
972 	  UNCH *s;
973 
974 	  if (sdckname(tbuf, classes[i]) == FAIL) {
975 	       frem((UNIV)buf);
976 	       return FAIL;
977 	  }
978 	  if (sdparm(tbuf, &pcblitp) != LIT1) {
979 	       sderr(123, (UNCH *)0, (UNCH *)0);
980 	       frem((UNIV)buf);
981 	       return FAIL;
982 	  }
983 	  start[i] = bufi;
984 
985 	  for (s = tbuf; *s; s++) {
986 	       int c = *s;
987 	       if (c == DELNONCH) {
988 		    c = UNSHIFTNON(*s);
989 		    s++;
990 	       }
991 	       c = sdtranschar(c);
992 	       if (c < 0)
993 		    bad = 1;
994 	       else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
995 			&& c != '.' && c != '-') {
996 		    int class = lextoke[c];
997 		    if (class == SEP || class == SP || class == NMC
998 			|| class == NMS || class == NU)
999 			 sderr(E_NMBAD, ltous((long)c), (UNCH *)0);
1000 		    else
1001 			 sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);
1002 		    bad = 1;
1003 	       }
1004 	       if (bufi >= bufsize)
1005 		    buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);
1006 	       buf[bufi++] = c;
1007 	  }
1008 
1009 	  count[i] = bufi - start[i];
1010 	  (void)sdparm(tbuf, 0);
1011      }
1012      if (!bad && count[0] != count[1]) {
1013 	  sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);
1014 	  bad = 1;
1015      }
1016      if (!bad && count[2] != count[3]) {
1017 	  sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);
1018 	  bad = 1;
1019      }
1020      if (!bad) {
1021 	  nlextoke = (UNCH *)rmalloc(256);
1022 	  memcpy((UNIV)nlextoke, lextoke, 256);
1023 	  nlextoke['.'] = nlextoke['-'] = INV;
1024 
1025 	  nlextran = (UNCH *)rmalloc(256);
1026 	  memcpy((UNIV)nlextran, lextran, 256);
1027 
1028 	  for (i = 0; i < count[0]; i++) {
1029 	       UNCH lc = buf[start[0] + i];
1030 	       UNCH uc = buf[start[1] + i];
1031 	       nlextoke[lc] = NMS;
1032 	       nlextoke[uc] = NMS;
1033 	       nlextran[lc] = uc;
1034 	  }
1035 
1036 	  for (i = 0; i < count[2]; i++) {
1037 	       UNCH lc = buf[start[2] + i];
1038 	       UNCH uc = buf[start[3] + i];
1039 	       if (nlextoke[lc] == NMS) {
1040 		    sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);
1041 		    bad = 1;
1042 	       }
1043 	       else if (nlextoke[uc] == NMS) {
1044 		    sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);
1045 		    bad = 1;
1046 	       }
1047 	       else {
1048 		    nlextoke[lc] = NMC;
1049 		    nlextoke[uc] = NMC;
1050 		    nlextran[lc] = uc;
1051 	       }
1052 	  }
1053 	  if (nlextoke['-'] != NMC) {
1054 	       sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);
1055 	       bad = 1;
1056 	  }
1057 	  if (bad) {
1058 	       if (nlextoke) {
1059 		    frem((UNIV)nlextoke);
1060 		    nlextoke = 0;
1061 	       }
1062 	       if (nlextran) {
1063 		    frem((UNIV)nlextran);
1064 		    nlextran = 0;
1065 	       }
1066 	  }
1067      }
1068 
1069      frem((UNIV)buf);
1070 
1071      if (sdckname(tbuf, knamecase) == FAIL)
1072 	  return FAIL;
1073      for (i = 0; i < SIZEOF(types); ++i) {
1074 	  if (sdname(tbuf, types[i]) == FAIL)
1075 	       return FAIL;
1076 	  if (sdparm(tbuf, 0) != NAS1) {
1077 	       sderr(120, (UNCH *)0, (UNCH *)0);
1078 	       return FAIL;
1079 	  }
1080 	  if (matches(tbuf, kyes))
1081 	       sd.namecase[i] = 1;
1082 	  else if (matches(tbuf, kno))
1083 	       sd.namecase[i] = 0;
1084 	  else {
1085 	       sderr(E_YESNO, tbuf+1, (UNCH *)0);
1086 	       return FAIL;
1087 	  }
1088      }
1089      return SUCCESS;
1090 }
1091 
1092 /* Parse the DELIM section. Uses one token lookahead. */
1093 
1094 static
sddelim(tbuf)1095 int sddelim(tbuf)
1096 UNCH *tbuf;
1097 {
1098      int changed = 0;
1099      if (sdname(tbuf, kdelim) == FAIL
1100 	 || sdname(tbuf, kgeneral) == FAIL
1101 	 || sdname(tbuf, ksgmlref) == FAIL)
1102 	  return FAIL;
1103      for (;;) {
1104 	  if (sdparm(tbuf, 0) != NAS1) {
1105 	       sderr(120, (UNCH *)0, (UNCH *)0);
1106 	       return FAIL;
1107 	  }
1108 	  if (matches(tbuf, kshortref))
1109 	       break;
1110 	  if (sdparm(tbuf, &pcblitp) != LIT1) {
1111 	       sderr(123, (UNCH *)0, (UNCH *)0);
1112 	       return FAIL;
1113 	  }
1114 	  changed = 1;
1115      }
1116      if (changed) {
1117 	  sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);
1118 	  changed = 0;
1119      }
1120      if (sdparm(tbuf, 0) != NAS1) {
1121 	  sderr(120, (UNCH *)0, (UNCH *)0);
1122 	  return FAIL;
1123      }
1124      if (matches(tbuf, ksgmlref))
1125 	  sd.shortref = 1;
1126      else if (matches(tbuf, knone))
1127 	  sd.shortref = 0;
1128      else {
1129 	  sderr(118, tbuf+1, ksgmlref);	/* probably they forgot SGMLREF */
1130 	  return FAIL;
1131      }
1132      while (sdparm(tbuf, &pcblitp) == LIT1)
1133 	  changed = 1;
1134      if (changed)
1135 	  sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);
1136      return SUCCESS;
1137 }
1138 
1139 /* Parse the NAMES section. Uses one token lookahead. */
1140 
1141 static
sdnames(tbuf)1142 int sdnames(tbuf)
1143 UNCH *tbuf;
1144 {
1145      int i;
1146      if (sdckname(tbuf, knames) == FAIL)
1147 	  return FAIL;
1148      if (sdname(tbuf, ksgmlref) == FAIL)
1149 	  return FAIL;
1150 
1151      while (sdparm(tbuf, 0) == NAS1) {
1152 	  int j;
1153 	  if (matches(tbuf, kquantity))
1154 	       break;
1155 	  for (i = 0; i < NKEYS; i++)
1156 	       if (matches(tbuf, key[i]))
1157 		    break;
1158 	  if (i >= NKEYS) {
1159 	       sderr(E_BADKEY, tbuf+1, (UNCH *)0);
1160 	       return FAIL;
1161 	  }
1162 	  if (sdparm(tbuf, &pcblitp) != NAS1) {
1163 	       sderr(120, (UNCH *)0, (UNCH *)0);
1164 	       return FAIL;
1165 	  }
1166 	  if (!newkey) {
1167 	       newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);
1168 	       MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);
1169 	  }
1170 	  for (j = 0; j < NKEYS; j++) {
1171 	       if (matches(tbuf, key[j])) {
1172 		    sderr(E_REFNAME, tbuf + 1, (UNCH *)0);
1173 		    break;
1174 	       }
1175 	       if (matches(tbuf, newkey[j])) {
1176 		    sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);
1177 		    break;
1178 	       }
1179 	  }
1180 	  if (j >= NKEYS)
1181 	       ustrcpy(newkey[i], tbuf + 1);
1182      }
1183      /* Now install the new keys. */
1184      if (newkey) {
1185 	  for (i = 0; i < NKEYS; i++)
1186 	       if (newkey[i][0] != '\0') {
1187 		    UNCH temp[REFNAMELEN + 1];
1188 
1189 		    ustrcpy(temp, key[i]);
1190 		    ustrcpy(key[i], newkey[i]);
1191 		    ustrcpy(newkey[i], temp);
1192 	       }
1193      }
1194      return SUCCESS;
1195 }
1196 
1197 /* Parse the QUANTITY section. Uses one token lookahead. */
1198 
sdquantity(tbuf)1199 static int sdquantity(tbuf)
1200 UNCH *tbuf;
1201 {
1202      int quantity[NQUANTITY];
1203      int i;
1204 
1205      for (i = 0; i < NQUANTITY; i++)
1206 	  quantity[i] = -1;
1207      if (sdckname(tbuf, kquantity) == FAIL)
1208 	  return FAIL;
1209      if (sdname(tbuf, ksgmlref) == FAIL)
1210 	  return FAIL;
1211      while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {
1212 	  long n;
1213 	  for (i = 0; i < SIZEOF(quantity_names); i++)
1214 	       if (matches(tbuf, quantity_names[i]))
1215 		    break;
1216 	  if (i >= SIZEOF(quantity_names)) {
1217 	       sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);
1218 	       return FAIL;
1219 	  }
1220 	  if (sdparm(tbuf, 0) != NUM1) {
1221 	       sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
1222 	       return FAIL;
1223 	  }
1224 	  n = atol((char *)tbuf);
1225 	  if (n < sd.quantity[i])
1226 	       sderr(E_QUANTITY, (UNCH *)quantity_names[i],
1227 		     ltous((long)sd.quantity[i]));
1228 	  else if (n > max_quantity[i]) {
1229 	       sderr(E_QTOOBIG, (UNCH *)quantity_names[i],
1230 		     ltous((long)max_quantity[i]));
1231 	       quantity[i] = max_quantity[i];
1232 	  }
1233 	  else
1234 	       quantity[i] = (int)n;
1235      }
1236      for (i = 0; i < NQUANTITY; i++)
1237 	  if (quantity[i] > 0) {
1238 	       sd.quantity[i] = quantity[i];
1239 	       if (!quantity_changed)
1240 		    quantity_changed = (char *)rmalloc(NQUANTITY);
1241 	       quantity_changed[i] = 1;
1242 	  }
1243      return SUCCESS;
1244 }
1245 
1246 /* Parse the FEATURES section.  Uses no lookahead. */
1247 
sdfeatures(tbuf)1248 static int sdfeatures(tbuf)
1249 UNCH *tbuf;
1250 {
1251      static struct  {
1252 	  UNCH *name;
1253 	  UNCH argtype;  /* 0 = no argument, 1 = boolean, 2 = numeric */
1254 	  UNIV valp;     /* UNCH * if boolean, long * if numeric. */
1255      } features[] = {
1256 	  { kminimize, 0, 0 },
1257 	  { kdatatag, 1, 0 },
1258 	  { komittag, 1, (UNIV)&sd.omittag },
1259 	  { krank, 1, 0 },
1260 	  { kshorttag, 1, (UNIV)&sd.shorttag },
1261 	  { klink, 0, 0 },
1262 	  { ksimple, 2, 0 },
1263 	  { kimplicit, 1, 0 },
1264 	  { kexplicit, 2, 0 },
1265 	  { kother, 0, 0 },
1266 	  { kconcur, 2, 0 },
1267 	  { ksubdoc, 2, (UNIV)&sd.subdoc },
1268 	  { kformal, 1, (UNIV)&sd.formal },
1269      };
1270 
1271      int i;
1272 
1273      if (sdckname(tbuf, kfeatures) == FAIL)
1274 	  return FAIL;
1275      for (i = 0; i < SIZEOF(features); i++) {
1276 	  if (sdname(tbuf, features[i].name) == FAIL) return FAIL;
1277 	  if (features[i].argtype > 0) {
1278 	       long n;
1279 	       if (sdparm(tbuf, 0) != NAS1) {
1280 		    sderr(120, (UNCH *)0, (UNCH *)0);
1281 		    return FAIL;
1282 	       }
1283 	       if (matches(tbuf, kyes)) {
1284 		    if (features[i].argtype > 1) {
1285 			 if (sdparm(tbuf, 0) != NUM1) {
1286 			      sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
1287 			      return FAIL;
1288 			 }
1289 			 n = atol((char *)tbuf);
1290 			 if (n == 0)
1291 			      sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);
1292 		    }
1293 		    else
1294 			 n = 1;
1295 	       }
1296 	       else if (matches(tbuf, kno))
1297 		    n = 0;
1298 	       else {
1299 		    sderr(E_YESNO, tbuf+1, (UNCH *)0);
1300 		    return FAIL;
1301 	       }
1302 	       if (features[i].valp == 0) {
1303 		    if (n > 0)
1304 			 sderr(E_NOTSUPPORTED, features[i].name,
1305 			      (UNCH *)0);
1306 	       }
1307 	       else if (features[i].argtype > 1)
1308 		    *(long *)features[i].valp = n;
1309 	       else
1310 		    *(UNCH *)features[i].valp = (UNCH)n;
1311 	  }
1312      }
1313      if (!sd.shorttag)
1314 	  noemptytag();
1315      return SUCCESS;
1316 }
1317 
1318 /* Parse the APPINFO section.  Uses no lookahead. */
1319 
sdappinfo(tbuf)1320 static int sdappinfo(tbuf)
1321 UNCH *tbuf;
1322 {
1323      if (sdname(tbuf, kappinfo) == FAIL) return FAIL;
1324      switch (sdparm(tbuf, &pcblitv)) {
1325      case LIT1:
1326 	  appinfosw = 1;
1327 	  break;
1328      case NAS1:
1329 	  if (matches(tbuf, knone))
1330 	       break;
1331 	  sderr(118, tbuf+1, knone);
1332 	  return FAIL;
1333      default:
1334 	  sderr(E_XNMLIT, knone, (UNCH *)0);
1335 	  return FAIL;
1336      }
1337      return SUCCESS;
1338 }
1339 
1340 /* Change a prefix of ISO 8879-1986 to ISO 8879:1986.  Amendment 1 to
1341 the standard requires the latter. */
1342 
sdfixstandard(tbuf,silently)1343 static VOID sdfixstandard(tbuf, silently)
1344 UNCH *tbuf;
1345 int silently;
1346 {
1347      if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
1348 	  if (!silently)
1349 	       sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
1350 	  tbuf[8] = ':';
1351      }
1352 }
1353 
sdname(tbuf,key)1354 static int sdname(tbuf, key)
1355 UNCH *tbuf;
1356 UNCH *key;
1357 {
1358      if (sdparm(tbuf, 0) != NAS1) {
1359 	  sderr(120, (UNCH *)0, (UNCH *)0);
1360 	  return FAIL;
1361      }
1362      if (!matches(tbuf, key)) {
1363 	  sderr(118, tbuf+1, key);
1364 	  return FAIL;
1365      }
1366      return SUCCESS;
1367 }
1368 
sdckname(tbuf,key)1369 static int sdckname(tbuf, key)
1370 UNCH *tbuf;
1371 UNCH *key;
1372 {
1373      if (pcbsd.action != NAS1) {
1374 	  sderr(120, (UNCH *)0, (UNCH *)0);
1375 	  return FAIL;
1376      }
1377      if (!matches(tbuf, key)) {
1378 	  sderr(118, tbuf+1, key);
1379 	  return FAIL;
1380      }
1381      return SUCCESS;
1382 }
1383 
1384 /* Parse a SGML declaration parameter.  If lpcb is NULL, pt must be
1385 REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters
1386 long. LPCB should be NULL if a literal is not allowed. */
1387 
sdparm(pt,lpcb)1388 static int sdparm(pt, lpcb)
1389 UNCH *pt;			/* Token buffer. */
1390 struct parse *lpcb;		/* PCB for literal parse. */
1391 {
1392      for (;;) {
1393 	  parse(&pcbsd);
1394 	  if (pcbsd.action != ISIG)
1395 	       break;
1396 	  sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);
1397      }
1398      ++parmno;
1399      switch (pcbsd.action) {
1400      case LIT1:
1401 	  if (!lpcb) {
1402 	       sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
1403 	       REPEATCC;
1404 	       return pcbsd.action = INV_;
1405 	  }
1406 	  parselit(pt, lpcb, REFLITLEN, lex.d.lit);
1407 	  return pcbsd.action;
1408      case LIT2:
1409 	  if (!lpcb) {
1410 	       sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
1411 	       REPEATCC;
1412 	       return pcbsd.action = INV_;
1413 	  }
1414 	  parselit(pt, lpcb, REFLITLEN, lex.d.lita);
1415 	  return pcbsd.action = LIT1;
1416      case NAS1:
1417 	  parsenm(pt, 1);
1418 	  return pcbsd.action;
1419      case NUM1:
1420 	  parsetkn(pt, NU, REFNAMELEN);
1421 	  return pcbsd.action;
1422      }
1423      return pcbsd.action;
1424 }
1425 
sdinit()1426 VOID sdinit()
1427 {
1428      int i;
1429      /* Shunned character numbers in the reference concrete syntax. */
1430      static UNCH refshun[] = {
1431 	  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1432 	  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
1433 	  };
1434      UNCH **p;
1435      /* A character is magic if it is a non-SGML character used for
1436      some internal purpose in the parser. */
1437      char_flags[EOS] |= CHAR_MAGIC;
1438      char_flags[EOBCHAR] |= CHAR_MAGIC;
1439      char_flags[EOFCHAR] |= CHAR_MAGIC;
1440      char_flags[GENRECHAR] |= CHAR_MAGIC;
1441      char_flags[DELNONCH] |= CHAR_MAGIC;
1442      char_flags[DELCDATA] |= CHAR_MAGIC;
1443      char_flags[DELSDATA] |= CHAR_MAGIC;
1444 
1445      /* Figure out the significant SGML characters. */
1446      for (p = lextabs; *p; p++) {
1447 	  UNCH datclass = (*p)[CANON_DATACHAR];
1448 	  UNCH nonclass = (*p)[CANON_NONSGML];
1449 	  for (i = 0; i < 256; i++)
1450 	       if (!(char_flags[i] & CHAR_MAGIC)
1451 		   && (*p)[i] != datclass && (*p)[i] != nonclass)
1452 		    char_flags[i] |= CHAR_SIGNIFICANT;
1453      }
1454      for (i = 0; i < SIZEOF(refshun); i++)
1455 	  char_flags[refshun[i]] |= CHAR_SHUNNED;
1456      for (i = 0; i < 256; i++)
1457 	  if (ISASCII(i) && iscntrl(i))
1458 	       char_flags[i] |= CHAR_SHUNNED;
1459      bufsalloc();
1460 }
1461 
1462 
1463 static
bufsalloc()1464 VOID bufsalloc()
1465 {
1466      scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));
1467      tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);
1468      /* entbuf is used for parsing numeric character references */
1469      entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);
1470 }
1471 
1472 static
bufsrealloc()1473 VOID bufsrealloc()
1474 {
1475      UNS size;
1476 
1477      if (ENTLVL != REFENTLVL)
1478 	  scbs = (struct source *)rrealloc((UNIV)scbs,
1479 					   (ENTLVL+1)*sizeof(struct source));
1480      /* Calculate the size for tbuf. */
1481      size = LITLEN + ATTSPLEN;
1482      if (PILEN > size)
1483 	  size = PILEN;
1484      if (BSEQLEN > size)
1485 	  size = BSEQLEN;
1486      if (size != REFATTSPLEN + REFLITLEN)
1487 	  tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);
1488      if (NAMELEN != REFNAMELEN)
1489 	  entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);
1490 }
1491 
1492 
1493 /* Check that the non-SGML characters are compatible with the concrete
1494 syntax and munge the lexical tables accordingly.  If IMPLIED is
1495 non-zero, then the SGML declaration was implied; in this case, don't
1496 give error messages about shunned characters not being declared
1497 non-SGML.  Also make any changes that are required by the NAMING section.
1498 */
1499 
setlexical()1500 static VOID setlexical()
1501 {
1502      int i;
1503      UNCH **p;
1504 
1505      if (nlextoke) {
1506 	  /* Handle characters that were made significant by the
1507 	     NAMING section. */
1508 	  for (i = 0; i < 256; i++)
1509 	       if (nlextoke[i] == NMC || nlextoke[i] == NMS)
1510 		    char_flags[i] |= CHAR_SIGNIFICANT;
1511      }
1512 
1513      for (i = 0; i < 256; i++)
1514 	  if (char_flags[i] & CHAR_SIGNIFICANT) {
1515 	       /* Significant SGML characters musn't be non-SGML. */
1516 	       if (char_flags[i] & CHAR_NONSGML) {
1517 		    UNCH buf[2];
1518 		    buf[0] = i;
1519 		    buf[1] = '\0';
1520 		    sderr(E_NONSGML, buf, (UNCH *)0);
1521 		    char_flags[i] &= ~CHAR_NONSGML;
1522 	       }
1523 	  }
1524 	  else {
1525 	       /* Shunned characters that are not significant SGML characters
1526 		  must be non-SGML. */
1527 	       if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))
1528 		   == CHAR_SHUNNED) {
1529 		   sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);
1530 		   char_flags[i] |= CHAR_NONSGML;
1531 	       }
1532 	  }
1533 
1534 
1535      /* Now munge the lexical tables. */
1536      for (p = lextabs; *p; p++) {
1537 	  UNCH nonclass = (*p)[CANON_NONSGML];
1538 	  UNCH datclass = (*p)[CANON_DATACHAR];
1539 	  UNCH nmcclass = (*p)[CANON_NMC];
1540 	  UNCH nmsclass = (*p)[CANON_NMS];
1541 	  UNCH minclass = (*p)[CANON_MIN];
1542 	  for (i = 0; i < 256; i++) {
1543 	       if (char_flags[i] & CHAR_NONSGML) {
1544 		    /* We already know that it's not significant. */
1545 		    if (!(char_flags[i] & CHAR_MAGIC))
1546 			 (*p)[i] = nonclass;
1547 	       }
1548 	       else {
1549 		    if (char_flags[i] & CHAR_MAGIC) {
1550 			 sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);
1551 		    }
1552 		    else if (!(char_flags[i] & CHAR_SIGNIFICANT))
1553 			 (*p)[i] = datclass;
1554 		    else if (*p == lexmin) {
1555 			 /* If it used to be NONSGML, but its now significant,
1556 			    treat it like a datachar. */
1557 			 if ((*p)[i] == nonclass)
1558 			      (*p)[i] = datclass;
1559 		    }
1560 		    else if (nlextoke
1561 			     /* This relies on the fact that lextoke
1562 				occurs last in lextabs. */
1563 			     && lextoke[i] != nlextoke[i]) {
1564 			 switch (nlextoke[i]) {
1565 			 case NMC:
1566 			      (*p)[i] = nmcclass;
1567 			      break;
1568 			 case NMS:
1569 			      (*p)[i] = nmsclass;
1570 			      break;
1571 			 case INV:
1572 			      /* This will happen if period is not a
1573 				 name character. */
1574 			      (*p)[i] = minclass;
1575 			      break;
1576 			 default:
1577 			      abort();
1578 			 }
1579 		    }
1580 	       }
1581 	  }
1582      }
1583      if (nlextran) {
1584 	  memcpy((UNIV)lextran, (UNIV)nlextran, 256);
1585 	  frem((UNIV)nlextran);
1586      }
1587      if (nlextoke) {
1588 	  frem((UNIV)nlextoke);
1589 	  nlextoke = 0;
1590      }
1591 
1592 }
1593 
1594 /* Munge parse tables so that empty start and end tags are not recognized. */
1595 
noemptytag()1596 static VOID noemptytag()
1597 {
1598      static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
1599      int i;
1600 
1601      for (i = 0; i < SIZEOF(pcbs); i++) {
1602 	  int maxclass, maxstate;
1603 	  int j, k, act;
1604 	  UNCH *plex = pcbs[i]->plex;
1605 	  UNCH **ptab = pcbs[i]->ptab;
1606 
1607 	  /* Figure out the maximum lexical class. */
1608 	  maxclass = 0;
1609 	  for (j = 0; j < 256; j++)
1610 	       if (plex[j] > maxclass)
1611 		    maxclass = plex[j];
1612 
1613 	  /* Now figure out the maximum state number and at the same time
1614 	     change actions. */
1615 
1616 	  maxstate = 0;
1617 
1618 	  for (j = 0; j <= maxstate; j += 2) {
1619 	       for (k = 0; k <= maxclass; k++)
1620 		    if (ptab[j][k] > maxstate)
1621 			 maxstate = ptab[j][k];
1622 	       /* If the '>' class has an empty start or end tag action,
1623 		  change it to the action that the NMC class has. */
1624 	       act = ptab[j + 1][plex['>']];
1625 	       if (act == NET_ || act == NST_)
1626 		    ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];
1627 	  }
1628      }
1629 }
1630 
1631 /* Lookup the value of the entry in pmap PTR whose key is KEY. */
1632 
pmaplookup(ptr,key)1633 static UNIV pmaplookup(ptr, key)
1634 struct pmap *ptr;
1635 char *key;
1636 {
1637      for (; ptr->name; ptr++)
1638 	  if (strcmp(key, ptr->name) == 0)
1639 	       return ptr->value;
1640      return 0;
1641 }
1642 
1643 /* Return an ASCII representation of N. */
1644 
ltous(n)1645 static UNCH *ltous(n)
1646 long n;
1647 {
1648      static char buf[sizeof(long)*3 + 2];
1649      sprintf(buf, "%ld", n);
1650      return (UNCH *)buf;
1651 }
1652 
sgmlwrsd(fp)1653 VOID sgmlwrsd(fp)
1654 FILE *fp;
1655 {
1656      int i;
1657      int changed;
1658      char *p;
1659      char uc[256];		/* upper case characters (with different lower
1660 				   case characters) */
1661      char lcletter[256];	/* LC letters: a-z */
1662 
1663      fprintf(fp, "<!SGML \"%s\"\n", standard);
1664      fprintf(fp,
1665 	     "CHARSET\nBASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET\n",
1666 	     SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
1667 
1668      if (!done_nonsgml) {
1669 	  done_nonsgml = 1;
1670 	  for (i = 0; i < 256; i++)
1671 	       if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))
1672 		   == CHAR_SHUNNED)
1673 	            char_flags[i] |= CHAR_NONSGML;
1674      }
1675      i = 0;
1676      while (i < 256) {
1677 	  int j;
1678 	  for (j = i + 1; j < 256; j++)
1679 	       if ((char_flags[j] & CHAR_NONSGML)
1680 		   != (char_flags[i] & CHAR_NONSGML))
1681 		    break;
1682 	  if (char_flags[i] & CHAR_NONSGML)
1683 	       fprintf(fp, "%d %d UNUSED\n", i, j - i);
1684 	  else
1685 	       fprintf(fp, "%d %d %d\n", i, j - i, i);
1686 	  i = j;
1687      }
1688      fprintf(fp, "CAPACITY\n");
1689      changed = 0;
1690      for (i = 0; i < NCAPACITY; i++)
1691 	  if (refcapset[i] != sd.capacity[i]) {
1692 	       if (!changed) {
1693 		    fprintf(fp, "SGMLREF\n");
1694 		    changed = 1;
1695 	       }
1696 	       fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);
1697 	  }
1698      if (!changed)
1699 	  fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
1700      fprintf(fp, "SCOPE DOCUMENT\n");
1701 
1702      fprintf(fp, "SYNTAX\nSHUNCHAR");
1703      for (i = 0; i < 256; i++)
1704 	  if (char_flags[i] & CHAR_SHUNNED)
1705 	       break;
1706      if (i == 256)
1707 	  fprintf(fp, " NONE\n");
1708      else {
1709 	  for (; i < 256; i++)
1710 	       if (char_flags[i] & CHAR_SHUNNED)
1711 		    fprintf(fp, " %d", i);
1712 	  fprintf(fp, "\n");
1713      }
1714 
1715      fprintf(fp,
1716 	     "BASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET 0 256 0\n",
1717 	     SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
1718 
1719      fprintf(fp, "FUNCTION\nRE %d\nRS %d\nSPACE %d\nTAB SEPCHAR %d\n",
1720 	     RECHAR, RSCHAR, ' ', TABCHAR);
1721 
1722      MEMZERO((UNIV)uc, 256);
1723      for (i = 0; i < 256; i++)
1724 	  if (lextran[i] != i)
1725 	       uc[lextran[i]] = 1;
1726 
1727      MEMZERO((UNIV)lcletter, 256);
1728      for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)
1729 	  lcletter[(unsigned char)*p]= 1;
1730 
1731      fprintf(fp, "NAMING\n");
1732      fputs("LCNMSTRT \"", fp);
1733      for (i = 0; i < 256; i++)
1734 	  if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
1735 	       fprintf(fp, "&#%d;", i);
1736      fputs("\"\n", fp);
1737      fputs("UCNMSTRT \"", fp);
1738      for (i = 0; i < 256; i++)
1739 	  if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
1740 	       fprintf(fp, "&#%d;", lextran[i]);
1741      fputs("\"\n", fp);
1742      fputs("LCNMCHAR \"", fp);
1743      for (i = 0; i < 256; i++)
1744 	  if (lextoke[i] == NMC && !uc[i])
1745 	       fprintf(fp, "&#%d;", i);
1746      fputs("\"\n", fp);
1747      fputs("UCNMCHAR \"", fp);
1748      for (i = 0; i < 256; i++)
1749 	  if (lextoke[i] == NMC && !uc[i])
1750 	       fprintf(fp, "&#%d;", lextran[i]);
1751      fputs("\"\n", fp);
1752 
1753      fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",
1754 	     sd.namecase[0] ? "YES" : "NO",
1755 	     sd.namecase[1] ? "YES" : "NO");
1756      fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",
1757 	     sd.shortref ? "SGMLREF" : "NONE");
1758      fprintf(fp, "NAMES SGMLREF\n");
1759      if (newkey) {
1760 	  /* The reference key was saved in newkey. */
1761 	  for (i = 0; i < NKEYS; i++)
1762 	       if (newkey[i][0])
1763 		    fprintf(fp, "%s %s\n", newkey[i], key[i]);
1764      }
1765      fprintf(fp, "QUANTITY SGMLREF\n");
1766      if (quantity_changed)
1767 	  for (i = 0; i < NQUANTITY; i++)
1768 	       if (quantity_changed[i])
1769 		    fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);
1770      fprintf(fp,
1771 	     "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",
1772 	     sd.omittag ? "YES" : "NO",
1773 	     sd.shorttag ? "YES" : "NO");
1774      fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");
1775      fprintf(fp, "OTHER CONCUR NO ");
1776      if (sd.subdoc > 0)
1777 	  fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);
1778      else
1779 	  fprintf(fp, "SUBDOC NO ");
1780      fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");
1781      fprintf(fp, "APPINFO NONE");
1782      fprintf(fp, ">\n");
1783 }
1784 
1785 /* Save an error to be printed only if FORMAL is declared as YES. */
1786 
1787 static
sdsaverr(number,parm1,parm2)1788 VOID sdsaverr(number, parm1, parm2)
1789 UNS number;
1790 UNCH *parm1;
1791 UNCH *parm2;
1792 {
1793      saved_errs[nsaved_errs++] = savmderr(number, parm1, parm2);
1794 }
1795 
1796 /*
1797 Local Variables:
1798 c-indent-level: 5
1799 c-continued-statement-offset: 5
1800 c-brace-offset: -5
1801 c-argdecl-indent: 0
1802 c-label-offset: -5
1803 End:
1804 */
1805