1 #include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
2 /* PARSE: Parse a source input stream with specified lexical and state tables.
3 Return to caller with action code.
4 */
parse(pcb)5 int parse(pcb)
6 struct parse *pcb; /* Current parse control block. */
7 {
8 int rc; /* Return code from ENTREF. */
9
10 while (1) {
11 NEWCC;
12 pcb->input = pcb->plex[*FPOS];
13 pcb->state = pcb->newstate;
14 pcb->newstate = (*(pcb->ptab + pcb->state)) [pcb->input];
15 pcb->action = (*(pcb->ptab + pcb->state + 1)) [pcb->input];
16 TRACEPCB(pcb);
17 switch (pcb->action) {
18 case RC2_: /* Back up two characters. */
19 REPEATCC;
20 case RCC_: /* Repeat current character. */
21 REPEATCC;
22 case NOP_: /* No action necessary.*/
23 continue;
24
25 case RS_: /* Record start: ccnt=0; ++rcnt.*/
26 ++RCNT; CTRSET(RSCC);
27 continue;
28
29 case GET_: /* EOB or dull EOS or EE found: keep going.*/
30 if (entget()==-1) {pcb->action = EOD_; break;}/* Signal if EOD.*/
31 continue;
32
33 case EOF_: /* Illegal entity end; return EE_. */
34 synerr(E_EOF, pcb);
35 pcb->action = EE_;
36 case EE_: /* Important EOS or EE found: return to caller.*/
37 if (entget()==-1) pcb->action = EOD_; /* Signal if EOD. */
38 break;
39
40 case PER_: /* Parameter entity reference. */
41 REPEATCC; /* Use PERO as 1st char of entity name. */
42 parsenm(entbuf, ENTCASE);
43 parse(&pcbref); /* Handle REFC or other terminator. */
44 rc = entref(entbuf);
45 if (rc==ENTPI) {pcb->action = PIE_; break;}
46 continue;
47
48 case ER_: /* General entity reference; continue. */
49 parsenm(entbuf, ENTCASE);
50 parse(&pcbref); /* Handle REFC or other terminator. */
51 rc = entref(entbuf);
52 if (rc==ENTDATA) {pcb->action = DEF_; break;}
53 if (rc==ENTPI) {pcb->action = PIE_; break;}
54 continue;
55
56
57 case PEX_: /* Parameter entity reference; return. */
58 REPEATCC; /* Use PERO as 1st char of entity name. */
59 case ERX_: /* General entity reference; return. */
60 parsenm(entbuf, ENTCASE);
61 parse(&pcbref); /* Handle REFC or other terminator. */
62 rc = entref(entbuf);
63 if (rc == ENTDATA){
64 /* Reference to external data/subdoc entity in replaceable
65 character data. */
66 if (BITON(entdatsw, NDECONT)) {
67 switch (((PNE)data)->nextype) {
68 case ESNCDATA:
69 case ESNSDATA:
70 /* The standard says `non-SGML data entity'
71 but the amendment should have changed it
72 to `external data entity'. */
73 synerr(145, pcb);
74 break;
75 case ESNNDATA:
76 case ESNSUB:
77 /* This is definitely illegal. */
78 synerr(141, pcb);
79 break;
80 }
81 entdatsw = 0;
82 continue;
83 }
84 pcb->action = DEF_;
85 }
86 else if (rc == ENTPI) {
87 /* Reference to PI entity not allowed in replaceable
88 character data. */
89 synerr(59, pcb);
90 entpisw = 0;
91 continue;
92 }
93 else if (rc) pcb->action = EE_;
94 break;
95
96 case CRN_: /* Character reference: numeric. */
97 parsetkn(entbuf, NU, NAMELEN);
98 parse(&pcbref); /* Handle reference terminator. */
99 pcb->action = charrefn(entbuf, pcb);
100 if (pcb->action==CRN_) continue; /* Invalid reference */
101 break;
102
103 case CRA_: /* Character reference: alphabetic. */
104 parsenm(entbuf, NAMECASE);
105 parse(&pcbref); /* Handle reference terminator. */
106 charrefa(entbuf);
107 if (docelsw) synerr(232, pcb);
108 continue;
109
110 case SYS_: /* Invalid NONCHAR: send msg and ignore. */
111 synerr(E_SYS, pcb);
112 if (*FPOS == DELNONCH) NEWCC;
113 continue;
114
115 case NON_: /* Valid NONCHAR: prefix and shift encoding. */
116 synerr(60, pcb);
117 pcb->action = datachar(*FPOS, pcb);
118 break;
119 case NSC_:
120 synerr(60, pcb);
121 NEWCC;
122 nonchbuf[1] = *FPOS;
123 pcb->action = NON_;
124 break;
125 case PCI_: /* Previous character was invalid (INV_). */
126 REPEATCC;
127 case INV_: /* Markup ended by invalid char; repeat char. */
128 synerr(9, pcb);
129 REPEATCC;
130 break;
131
132 case LNR_: /* Previous char exceeded len; back up to it. */
133 REPEATCC;
134 case LEN_: /* Token too long; ignore excess character. */
135 synerr(3, pcb);
136 continue;
137
138 case RCR_: /* Repeat current char and return to caller. */
139 REPEATCC;
140 default: /* Actions for specific parse. */
141 break;
142 }
143 return (int)pcb->action;
144 }
145 }
146 /* CHARREFA: Resolve an alphabetical reference to a function character
147 and put the character in the read buffer.
148 If reference is bad, issue an error message.
149 */
charrefa(r)150 VOID charrefa(r)
151 UNCH *r; /* Undelimited char ref (with length and EOS). */
152 {
153 UNCH thechar;
154
155 thechar = mapsrch(funtab, r+1);
156 if (thechar == 0)
157 synerr(62, &pcbref);
158 else {
159 /* This isn't ideal, because the character position will still
160 be wrong for one line. */
161 if (thechar == RSCHAR) RCNT--;
162 setcurchar(thechar);
163 REPEATCC;
164 }
165 }
166
167 /* Make the current character ch. */
168
setcurchar(ch)169 VOID setcurchar(ch)
170 int ch;
171 {
172 /* If we're reading directly from an internal entity, we can't
173 change the entity, since the entity might be referenced again.
174 So in this case we copy the entity. This is inefficient, but
175 it will only happen in a case like this:
176
177 <!entity % amp "&">
178 <!entity e "x%amp;#SPACE;">
179
180 Usually character references will have been processed while the
181 entity was being defined. */
182 if (*FPOS != ch) {
183 if (!FILESW && !COPIEDSW) {
184 UNCH *s = savestr(FBUF + 1);
185 FPOS = s + (FPOS - FBUF - 1);
186 FBUF = s - 1;
187 COPIEDSW = 1;
188 }
189 *FPOS = ch;
190 }
191 }
192
193 /* CHARREFN: Resolve a numeric character reference.
194 If reference is bad, issue an error message.
195 */
196
charrefn(r,pcb)197 int charrefn(r, pcb)
198 UNCH *r; /* Undelimited character reference. */
199 struct parse *pcb; /* Current parse control block. */
200 {
201 int thechar;
202
203 thechar = atoi((char *)r);
204 if (thechar<0 || thechar>255) {
205 synerr(61, &pcbref);
206 return((int)pcb->action);
207 }
208 return datachar(thechar, pcb);
209 }
210
211 /* Return ch as a datachar. If this a non-SGML character which might
212 confuse the parser, shift it to a code that won't and place it in a
213 special buffer which has DELNONCH in the preceding byte. Otherwise
214 put it the read buffer. */
215
datachar(ch,pcb)216 int datachar(ch, pcb)
217 int ch;
218 struct parse *pcb;
219 {
220 switch (ch) {
221 case EOS:
222 case EOFCHAR:
223 case EOBCHAR:
224 case GENRECHAR:
225 case DELCDATA:
226 case DELSDATA:
227 case DELNONCH:
228 /* A potentially confusing character which must be prefixed
229 with DELNONCH. */
230 nonchbuf[1] = SHIFTNON((UNCH)ch);
231 return NON_;
232 }
233 setcurchar(ch);
234 /* If in content, return DCE_ for element content, DAF_ for mixed. */
235 /* If not content, it must be a literal parse, so return MLA_. */
236 if (pcb == conpcb) {
237 if (pcb == &pcbcone)
238 return DCE_;
239 else {
240 data = FPOS;
241 /* Action for DAF_ will do REPEATCC. */
242 NEWCC;
243 return DAF_;
244 }
245 }
246 else
247 return MLA_;
248 }
249 /* INITATT: Initialize al with adl. */
250
initatt(adl)251 VOID initatt(adl)
252 struct ad *adl;
253 {
254 notadn = 0; /* No NOTATION attribute yet. */
255 conrefsw = 0; /* Assume no content reference att. */
256 /* Copy attribute definition list as a template. */
257 memcpy((UNIV)al, (UNIV)adl, (1+ADN(adl))*ADSZ);
258 }
259
260 /* PARSEATT: Parse attribute specification list.
261 Make a current copy of the attribute definition list
262 and update it with the user's specifications.
263 Indicate each attribute that was specified in the
264 list (as opposed to defaulted) by setting the ASPEC flag.
265 If no attributes were specified, return NULL. Otherwise,
266 if in the prolog, make a permanent copy of the list and
267 return its pointer. If not in the prolog, return al.
268 */
parseatt(adl,pt)269 struct ad *parseatt(adl, pt)
270 struct ad *adl; /* Attribute definition list. */
271 UNCH *pt; /* Tokenization area: tbuf[TAGLEN+ATTSPLEN]. */
272 {
273 UNCH *antvptr;
274 UNCH *nm = 0; /* Pointer to saved name in tbuf (with length). */
275 int adn = -1; /* Position of attribute in list (-1=empty). */
276 UNCH *tbuflim = pt + ATTSPLEN;
277 mdessv = es; /* Save es for checking entity nesting. */
278 initatt(adl);
279 while (pt<=tbuflim) {
280 parse(&pcbstag);
281 switch (pcbstag.action) {
282 case NVS: /* Att name or value token found. */
283 parsenm(pt, NAMECASE); /* Case translation wanted on name. */
284 pt += *(nm = pt); /* Save name while pointing past it. */
285 continue;
286
287 case AVD: /* Delimited value found. */
288 case AVDA: /* Delimited value found (alternate delimiter). */
289 /* Find position (adn) of saved attribute name in list. */
290 adn = anmget((int)ADN(al), nm);
291 parselit(pt,
292 (adn == 0 || ADTYPE(al, adn) == ACHARS)
293 ? &pcblitr
294 : &pcblitt,
295 LITLEN,
296 (pcbstag.action==AVD) ? lex.d.lit : lex.d.lita);
297 if (adn == 0) {
298 /* Error: unrecognized attribute name. */
299 sgmlerr(13, &pcbstag, nm+1, pt);
300 continue;
301 }
302 /* Tokenize and validate value; let it default if an error. */
303 /* Put value in list and bump ptr by the normalized length
304 (which is always >= the actual length). */
305 if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
306 continue;
307 case AVU: /* Attribute value found: undelimited. */
308 if (!sd.shorttag) sgmlerr(196, &pcbstag, (UNCH *)0, (UNCH *)0);
309 parsetkn(pt, NMC, LITLEN);
310 /* Find position (adn) of saved attribute name in list. */
311 if ((adn = anmget((int)ADN(al), nm))==0) {
312 /* Error: unrecognized attribute name. */
313 sgmlerr(13, &pcbstag, nm+1, pt);
314 continue;
315 }
316 /* Tokenize and validate value; let it default if an error. */
317 /* Put value in list and bump ptr by the normalized length
318 (which is always >= the actual length). */
319 if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
320 continue;
321
322 case NASV: /* Saved NVS was really an NTV. */
323 REPEATCC; /* Put back next token starter. */
324 pt = nm; /* Back up to NVS. */
325 case NTV: /* Name token value found. */
326 if (!sd.shorttag) sgmlerr(195, &pcbstag, (UNCH *)0, (UNCH *)0);
327 if (pcbstag.action==NTV) parsenm(pt, NAMECASE);
328 if ((adn = antvget((int)ADN(al), pt, &antvptr))==0) {
329 /* Error: unrecognized name token value. */
330 sgmlerr(74, &pcbstag, pt+1, (UNCH *)0);
331 continue;
332 }
333 /* Validate value; let it default if an error. */
334 /* Put value in list and bump ptr by the normalized length
335 (which is always >= the actual length). */
336 if (!attval(0, antvptr+1, adn, adl)) pt += ADLEN(al,adn);
337 continue;
338
339 default: /* All attributes have been parsed. */
340 REPEATCC; /* Put next char back for tag close parse. */
341 break;
342 }
343 break;
344 }
345 if (pt>tbuflim) synerr(75, &pcbstag);
346 if (es!=mdessv) synerr(37, &pcbstag);
347 if (adn<0) return((struct ad *)0); /* List was empty. */
348 TRACEADL(al);
349 return al;
350 }
351 /* ATTVAL: Validate a specified attribute value. Issue a message if it is
352 the wrong type (or otherwise is not up to spec), and use the default.
353 Call PARSEVAL to tokenize the value, unless it is a CDATA string.
354 If the attribute is a group, the value is a string.
355 For other types, the token count is set by PARSEVAL if the value
356 is syntactically correct. If incorrect (or if CDATA) the token
357 count is zero (i.e., the value is a string).
358 The length of a token does not include the length byte, and
359 there is no EOS. A string length (as always) includes both
360 the length byte and the EOS.
361 If it is a CONREF attribute, set a switch for STAG().
362 If it is a CURRENT attribute, store the value as the new default.
363 */
364 #define DEFVAL adl[adn].addef /* Default value of current attribute. */
365 #define DEFNUM adl[adn].adnum /* Default group size of current attribute. */
366 #define DEFLEN adl[adn].adlen /* Length of default value of current attribute.*/
attval(mtvsw,adval,adn,adl)367 int attval(mtvsw, adval, adn, adl)
368 int mtvsw; /* Must tokenize value: 1=yes; 0=no. */
369 UNCH *adval; /* Untokenized attribute value. */
370 int adn; /* Attribute's position in list. */
371 struct ad *adl; /* Element's master att def list. */
372 {
373 int errcode; /* Value/declaration conflict error code. */
374
375 if (GET(ADFLAGS(al,adn), ASPEC)) /* Can't respecify same attribute. */
376 {sgmlerr(73, &pcbstag, ADNAME(al,adn), adval); return(1);}
377 SET(ADFLAGS(al,adn), ASPEC); /* Indicate att was specified. */
378 if (GET(ADFLAGS(al,adn), ACONREF)) /* If attribute is content reference: */
379 conrefsw = TAGREF; /* Set switch for STAG(). */
380 if (mtvsw && ADTYPE(al,adn)!=ACHARS) {
381 /* If no syntax errors, check for proper group membership. */
382 if ( ((errcode = parseval(adval, ADTYPE(al,adn), lbuf))==0)
383 && GET(ADFLAGS(al,adn), AGROUP)
384 && !amemget(&al[adn], ADNUM(al,adn), lbuf) ) errcode = 18;
385 /* If syntax or group membership error, send message and exit. */
386 if (errcode) {
387 sgmlerr(errcode, &pcbstag, ADNAME(al,adn), adval);
388 SET(ADFLAGS(al,adn), AERROR);
389 return(1);
390 }
391 /* Replace specified value in adval with tokenized in lbuf. */
392 ustrcpy(adval, lbuf);
393 if (BITOFF(ADFLAGS(al,adn), AGROUP)) ADNUM(al,adn) = (UNCH)tokencnt;
394 }
395 if (!mtvsw)
396 adval--;
397 /* If attribute is FIXED, specified value must equal default. */
398 if (BITON(ADFLAGS(al,adn), AFIXED) && ustrcmp(adval, DEFVAL)) {
399 /* Since the value has been tokenized, don't use it in the
400 error message. */
401 sgmlerr(67, &pcbstag, ADNAME(al,adn), (UNCH *)0);
402 SET(ADFLAGS(al,adn), AERROR);
403 return(1);
404 }
405 ADLEN(al,adn) = vallen(ADTYPE(al,adn), ADNUM(al,adn), adval);
406 if (ADLEN(al,adn) > LITLEN) {
407 sgmlerr(224, &pcbstag, ADNAME(al,adn), (UNCH *)0);
408 SET(ADFLAGS(al,adn), AERROR);
409 return 1;
410 }
411 ADVAL(al,adn) = adval;
412 /* If attribute is CURRENT, value is new default.*/
413 if (GET(ADFLAGS(al,adn), ACURRENT)) {
414 if (ADLEN(al,adn)>DEFLEN) {
415 ds.attdef += (ADLEN(al,adn) - DEFLEN);
416 DEFLEN = ADLEN(al,adn);
417 }
418 DEFVAL = replace(DEFVAL, ADVAL(al,adn));
419 DEFNUM = ADNUM(al,adn);
420 }
421 return(0); /* Indicate value was valid. */
422 }
423 /* ADLVAL: Validate the completed attribute definition list (defaults plus
424 specified values). Issue a message if an
425 attribute is required or current and its value is NULL.
426 */
adlval(adsz,newetd)427 VOID adlval(adsz, newetd)
428 int adsz; /* Size of list. */
429 struct etd *newetd; /* Element type definition for this element. */
430 {
431 int adn = 1; /* Position in list. */
432 UNCH *npt, *pt; /* Ptr save areas. */
433 UNCH nptsv; /* Save area for ptr value (length?). */
434 struct dcncb *dpt; /* Save area for dcncb ptr. */
435
436 aentctr = 0; /* Number of AENTITY tokens in this att list. */
437 idrctr = 0; /* Number of IDREF tokens in this att list. */
438 do {
439 if (ADVAL(al,adn)==NULL) { /* NULL value */
440 if (GET(ADFLAGS(al,adn), AREQ+ACURRENT)) { /*Error if REQ, CURRENT*/
441 sgmlerr(19, &pcbstag, ADNAME(al,adn), (UNCH *)0);
442 SET(ADFLAGS(al,adn), AINVALID);
443 }
444 }
445 else switch (ADTYPE(al,adn)) {
446 case AENTITY: /* Return data ecb pointer if valid entity. */
447 aenttst(adn, ADVAL(al,adn));
448 break;
449 case AENTITYS: /* Return data ecb pointers if valid entities. */
450 pt = ADVAL(al,adn);
451 tokencnt = (int)ADNUM(al,adn);
452 while (tokencnt--) {
453 nptsv = *(npt = pt + *pt+1);
454 *pt += 2; *npt = EOS;
455 aenttst(adn, pt);
456 *pt -= 2; *(pt = npt) = nptsv;
457 }
458 break;
459 case AID:
460 /* Define ID; msg if it already exists. */
461 if (iddef(ADVAL(al,adn))) {
462 sgmlerr(71, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
463 SET(ADFLAGS(al,adn), AINVALID);
464 continue;
465 }
466 ++ds.idcnt;
467 break;
468 case AIDREF:
469 idreftst(adn, ADVAL(al,adn));
470 break;
471 case AIDREFS:
472 pt = ADVAL(al,adn);
473 tokencnt = (int)ADNUM(al,adn);
474 while (tokencnt--) {
475 nptsv = *(npt = pt + *pt+1);
476 *pt += 2; *npt = EOS;
477 idreftst(adn, pt);
478 *pt -= 2; *(pt = npt) = nptsv;
479 }
480 break;
481 case ANOTEGRP: /* Return notation identifier. */
482 if (GET(ADFLAGS(al,adn), ASPEC)) notadn = adn;/*NOTATION specified*/
483 if ((dpt = dcnfind(ADVAL(al,adn)))==0) {
484 sgmlerr(77, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
485 SET(ADFLAGS(al,adn), AINVALID);
486 }
487 else ADDATA(al,adn).x = dpt;
488 break;
489 }
490 if (!sd.shorttag && !sd.omittag && ADVAL(al,adn)!=NULL
491 && !GET(ADFLAGS(al,adn), ASPEC+AINVALID))
492 sgmlerr(197, &pcbstag, ADNAME(al,adn), (UNCH *)0);
493 } while ((adn+=BITON(ADFLAGS(al,adn),AGROUP) ? (int)ADNUM(al,adn)+1 : 1)<=adsz);
494
495 /* Error if NOTATION specified with CONREF attribute or EMPTY element. */
496 if (notadn && (conrefsw
497 || (newetd && GET(newetd->etdmod->ttype, MNONE)))) {
498 sgmlerr((UNS)(conrefsw ? 84 : 76), &pcbstag,
499 ADNAME(al,notadn), ADVAL(al,notadn)+1);
500 SET(ADFLAGS(al,notadn), AINVALID);
501 }
502 }
503 /* AENTTST: Validate an individual ENTITY token in AENTITY or AENTITYS value.
504 */
aenttst(adn,pt)505 VOID aenttst(adn, pt)
506 int adn; /* Position in list. */
507 UNCH *pt; /* Ptr to current ENTITY token in value. */
508 {
509 struct entity *ept; /* Save area for ecb ptr. */
510
511 if (++aentctr>GRPCNT) {
512 sgmlerr(136, &pcbstag, ADNAME(al,adn), pt+1);
513 SET(ADFLAGS(al,adn), AINVALID);
514 return;
515 }
516 if ( (ept = entfind(pt))==0
517 && (ecbdeflt==0 || (ept = usedef(pt))==0) ) {
518 sgmlerr(ecbdeflt ? 151 : 72, &pcbstag, ADNAME(al,adn), pt+1);
519 SET(ADFLAGS(al,adn), AINVALID);
520 return;
521 }
522 if (ept->estore==ESX || ept->estore==ESC || ept->estore==ESN) {
523 /* Error if DCN has no notation identifier. */
524 if (ept->estore==ESN && NEXTYPE(ept->etx.n)!=ESNSUB
525 && !NEDCNDEFINED(ept->etx.n)) {
526 sgmlerr(78, &pcbstag, NEDCN(ept->etx.n)+1,
527 pt+1);
528 SET(ADFLAGS(al,adn), AINVALID);
529 }
530 }
531 else {
532 sgmlerr(86, &pcbstag, ADNAME(al,adn), pt+1);
533 SET(ADFLAGS(al,adn), AINVALID);
534 }
535 }
536 /* IDREFTST: Validate an individual IDREF token in an IDREF or IDREFS value.
537 */
idreftst(adn,pt)538 VOID idreftst(adn, pt)
539 int adn; /* Position in list. */
540 UNCH *pt; /* Ptr to current IDREF token in value. */
541 {
542 struct fwdref *rp;
543 if (++idrctr>GRPCNT) {
544 sgmlerr(70, &pcbstag, ADNAME(al,adn), pt+1);
545 SET(ADFLAGS(al,adn), AINVALID);
546 return;
547 }
548 /* Note IDREF; indicate if ID exists. */
549 if ((rp = idref(pt)) != 0)
550 rp->msg = saverr(69, &pcbstag, ADNAME(al,adn), pt+1);
551 ++ds.idrcnt;
552 }
553 /* ANMGET: Locate an attribute name in an attribute definition list.
554 */
anmget(adsz,nm)555 int anmget(adsz, nm)
556 int adsz; /* Size of list. */
557 UNCH *nm; /* Value to be found (with length byte). */
558 {
559 int adn = 0; /* Position in list. */
560
561 while (++adn <= adsz && ustrcmp(nm+1, ADNAME(al,adn))) {
562 if (BITON(ADFLAGS(al,adn), AGROUP)) adn += (int)ADNUM(al,adn);
563 }
564 return (adn > adsz) ? 0 : adn;
565 }
566 /* ANTVGET: Find the position of a name token value in an attribute list.
567 Return the position of the attribute definition, or zero
568 if none was found. Set pp to the value, if non-NULL.
569 */
antvget(adsz,nm,pp)570 int antvget(adsz, nm, pp)
571 int adsz; /* Size of list. */
572 UNCH *nm; /* Value to be found (with length byte). */
573 UNCH **pp; /* Store value here */
574 {
575 int adn = 0; /* Position in list. */
576
577 while (++adn<=adsz) {
578 /* Test only name group members. */
579 if (BITON(ADFLAGS(al,adn), AGROUP)) {
580 int advn; /* Position of value in sub-list. */
581 if ((advn = amemget(&al[adn], (int)ADNUM(al,adn), nm))!=0) {
582 if (pp)
583 *pp = al[adn+advn].adname;
584 return adn;
585 }
586 adn += (int)ADNUM(al,adn);
587 }
588 }
589 return 0;
590 }
591 /* AMEMGET: Get the position of a member in an attribute name token group.
592 Returns the position, or zero if not found.
593 The length byte is ignored in the comparison so that final
594 form tokens from ATTVAL can be compared to group members.
595 */
amemget(anmtgrp,adsz,nm)596 int amemget(anmtgrp, adsz, nm)
597 struct ad anmtgrp[]; /* Name token group. */
598 int adsz; /* Size of group. */
599 UNCH *nm; /* Name to be found (with length byte). */
600 {
601 int adn = 0; /* Position in group. */
602
603 while ( ++adn<=adsz && ustrncmp(nm+1, anmtgrp[adn].adname+1, (UNS)*nm-1)) ;
604 return (adn>adsz) ? 0 : adn;
605 }
606 /* VALLEN: Returns the length of an attribute value for capacity
607 calculations. Normally, the length is NORMSEP plus the number
608 of characters. For tokenized lists, it is NORMSEP,
609 plus the number of characters in the tokens, plus
610 NORMSEP for each token.
611 ACHARS and tokenized lists don't have a length byte.
612
613 */
vallen(type,num,def)614 UNS vallen(type, num, def)
615 int type; /* ADTYPE(al,adn) */
616 int num; /* ADNUM(al,adn) */
617 UNCH *def; /* ADVAL(al,adn) */
618 {
619 if (type == ACHARS)
620 return ustrlen(def) + NORMSEP;
621 if (type < ATKNLIST)
622 return *def - 2 + NORMSEP;
623 return ustrlen(def) + num * (NORMSEP - 1) + NORMSEP;
624 }
625 /* PARSEGRP: Parse GI names, get their etds, and form an array of pointers
626 to them. The array is terminated by a NULL pointer.
627 The number of pointers (including the NULL) is returned.
628 The grp buffer must have room for GRPCNT+1 etds.
629 */
parsegrp(grp,pcb,tbuf)630 UNS parsegrp(grp, pcb, tbuf)
631 struct etd *grp[]; /* Buffer for building the group. */
632 struct parse *pcb; /* Current parse control block. */
633 UNCH *tbuf;
634 {
635 int grpcnt = 0; /* Number of etds in the group. */
636 int i;
637 int essv = es; /* Entity stack level when grp started. */
638
639 while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
640 switch (pcb->action) {
641 case NAS_: /* GI name: get its etd for the group. */
642 grp[grpcnt] = etddef(parsenm(tbuf, NAMECASE));
643 for (i = 0; i < grpcnt; i++)
644 if (grp[i] == grp[grpcnt]) {
645 mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->etdgi + 1);
646 break;
647 }
648 if (i == grpcnt)
649 grpcnt++;
650 continue;
651
652 case EE_: /* Entity ended (correctly or incorrectly). */
653 if (es<essv) {synerr(37, pcb); essv = es;}
654 continue;
655
656 case PIE_: /* PI entity reference (invalid). */
657 entpisw = 0; /* Reset PI entity indicator. */
658 synerr(59, pcb);
659 continue;
660
661 default:
662 break;
663 }
664 break;
665 }
666 grp[grpcnt++] = 0; /* NULL pointer indicates end of group. */
667 if (es!=essv) synerr(37, pcb);
668 return grpcnt; /* Return number of ptrs in group. */
669 }
670 /* PARSNGRP: Parse notation names, get their dcncbs, and form an array of
671 pointers to them. The array is terminated by a NULL pointer.
672 The number of pointers (including the NULL) is returned.
673 The grp buffer must have room for GRPCNT+1 members.
674 */
parsngrp(grp,pcb,tbuf)675 UNS parsngrp(grp, pcb, tbuf)
676 struct dcncb *grp[]; /* Buffer for building the group. */
677 struct parse *pcb; /* Current parse control block. */
678 UNCH *tbuf;
679 {
680 int grpcnt = 0; /* Number of members in the group. */
681 int i;
682 int essv = es; /* Entity stack level when grp started. */
683
684 while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
685 switch (pcb->action) {
686 case NAS_: /* Member name: get its control block. */
687 grp[grpcnt] = dcndef(parsenm(tbuf, NAMECASE));
688 for (i = 0; i < grpcnt; i++)
689 if (grp[i] == grp[grpcnt]) {
690 mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->ename + 1);
691 break;
692 }
693 if (i == grpcnt)
694 grpcnt++;
695 continue;
696
697 case EE_: /* Entity ended (correctly or incorrectly). */
698 if (es<essv) {synerr(37, pcb); essv = es;}
699 continue;
700
701 case PIE_: /* PI entity reference (invalid). */
702 entpisw = 0; /* Reset PI entity indicator. */
703 synerr(59, pcb);
704 continue;
705
706 default:
707 break;
708 }
709 break;
710 }
711 grp[grpcnt++] = 0; /* NULL pointer indicates end of group. */
712 if (es!=essv) synerr(37, pcb);
713 return grpcnt; /* Return number of ptrs in group. */
714 }
715 /* COPYGRP: Allocate storage for a group and copy the group into it.
716 */
copygrp(pg,grpsz)717 PETD *copygrp(pg, grpsz)
718 PETD pg[]; /* Pointer to a group (array of etd ptrs). */
719 UNS grpsz; /* Number of ptrs in grp, including final NULL. */
720 {
721 UNS glen; /* Group length in characters. */
722 PETD *gnm; /* Ptr to permanent name group. */
723
724 if (pg==0) return (PETD *)0;
725 glen = grpsz * sizeof(struct etd *);
726 memcpy( (UNIV)(gnm = (struct etd **)rmalloc(glen)) , (UNIV)pg, glen );
727 return gnm;
728 }
729 /* INGRP: Locate an etd in a name group and return its index+1 (or zero
730 if not found).
731 */
ingrp(pg,ketd)732 int ingrp(pg, ketd)
733 PETD pg[]; /* Array of pointers to etds. */
734 PETD ketd; /* Pointer to etd to be found in group. */
735 {
736 int i = 0; /* Array index. */
737
738 while (pg[i]) if (pg[i++]==ketd) return i;
739 return 0;
740 }
741 /* PARSELIT: Parse a delimited string and collect it into a token.
742 Caller supplies buffer, which must be 1 longer than
743 maximum string allowed.
744 Caller also supplies character that delimits the string.
745 TODO: Return 1 if CDATA, SDATA or NONSGML occurred.
746 */
747 #ifdef USE_PROTOTYPES
parselit(UNCH * tbuf,struct parse * pcb,UNS maxlen,UNCH del)748 VOID parselit(UNCH *tbuf, struct parse *pcb, UNS maxlen, UNCH del)
749 #else
750 VOID parselit(tbuf, pcb, maxlen, del)
751 UNCH *tbuf; /* Work area for tokenization (parmlen+1). */
752 struct parse *pcb; /* Current parse control block. */
753 UNS maxlen; /* Maximum length of token. */
754 UNCH del; /* Literal delimiter: LIT LITA PIC EOS */
755 #endif
756 {
757 UNCH *pt = tbuf; /* Current pointer into tbuf. */
758 UNCH lexsv = pcb->plex[del];/* Saved value of delimiter in lexical table. */
759 int essv = es; /* Entity stack level when literal started. */
760 UNCH datadel; /* Delimiter for CDATA/SDATA entity. */
761 int parmlen = (int)maxlen + 1; /* Working limit (to be decremented). */
762 int overflow = 0; /* Did the buffer overflow? */
763
764 pcb->plex[del] = pcb->plex == lexlms ? lex.l.litc : lex.l.minlitc;
765
766 /* The RPR_ action may cause the length of the literal to decrease by
767 1 (this discards a final space in a minimum literal); so while
768 building the literal, the length must be allowed to grow to
769 maxlen + 1. */
770
771 do {
772 switch (parse(pcb)) {
773 case LP2_: /* Move 2nd char back to buffer; redo prev.*/
774 REPEATCC;
775 case LPR_: /* Move previous char to buffer; REPEATCC; */
776 REPEATCC;
777 case MLA_: /* Move character to buffer. */
778 if (parmlen <= 0) { overflow = 1; break; }
779 *pt++ = *FPOS; --parmlen;
780 continue;
781
782 case FUN_: /* Function char found; replace with space.*/
783 if (parmlen <= 0) { overflow = 1; break; }
784 *pt++ = ' '; --parmlen;
785 continue;
786
787 case RSM_: /* Record start: ccnt=0; ++rcnt.*/
788 ++RCNT; CTRSET(RSCC);
789 if (parmlen <= 0) { overflow = 1; break; }
790 *pt++ = *FPOS; --parmlen;
791 continue;
792
793 case ERX_: /* Entity reference: cancel LITC delim. */
794 case PEX_: /* Parameter entity ref: cancel LITC delim.*/
795 lexlms[del] = lexsv;
796 continue;
797
798 case EE_:
799 if (es<essv) {
800 synerr(37, pcb);
801 essv = es;
802 }
803 /* If back at top level, re-enable the LITC delimiter. */
804 if (es==essv) lexlms[del] = lex.l.litc;
805 continue;
806
807 case MLE_: /* Char not allowed in minimum literal. */
808 synerr(63, pcb);
809 continue;
810
811 case DEF_: /* Data entity: add it to buffer. */
812 if (pcb == &pcblitt) {
813 int parmlensv = parmlen;
814 entdatsw = 0;
815 parmlen = tokdata(pt, parmlen);
816 if (parmlen < 0)
817 break;
818 pt += parmlensv - parmlen;
819 continue;
820 }
821 if (parmlen < datalen + 2) {
822 entdatsw = 0;
823 overflow = 1;
824 break;
825 }
826 parmlen -= datalen + 2;
827 *pt++ = datadel =
828 BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA;
829 entdatsw = 0;
830 memcpy( pt , data, datalen );
831 pt += datalen;
832 *pt++ = datadel;
833 continue;
834
835 case NON_: /* Non-SGML char (delimited and shifted). */
836 if (parmlen < 2) { overflow = 1; break; }
837 parmlen -= 2;
838 memcpy( pt , nonchbuf, 2 );
839 pt += 2;
840 continue;
841
842 case RPR_: /* Remove character from buffer. */
843 --pt; ++parmlen;
844 break;
845
846 case EOD_:
847 exiterr(92, pcb);
848
849 default:
850 break;
851 }
852 break;
853 } while (!overflow && pcb->action!=TER_);
854
855 if (parmlen <= 0) {
856 --pt;
857 overflow = 1;
858 }
859 if (overflow)
860 sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0);
861
862 datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */
863 *pt++ = EOS;
864 pcb->plex[del] = lexsv; /* Restore normal delimiter handling. */
865 if (es!=essv) synerr(37, pcb);
866 }
867
868 /* Handle a data entity in a tokenized attribute value literal.
869 Parmlen is amount of space left. Return new parmlen. If there's not
870 enough space return -1, and copy up to parmlen + 1 characters. Only
871 tokenization should be done, not attribute value interpretation. */
872
tokdata(pt,parmlen)873 int tokdata(pt, parmlen)
874 UNCH *pt;
875 int parmlen;
876 {
877 int skip = (pcblitt.newstate == 0);
878 int i;
879
880 for (i = 0; parmlen >= 0 && i < datalen; i++) {
881 switch (data[i]) {
882 case SPCCHAR:
883 if (!skip) {
884 *pt++ = data[i];
885 parmlen--;
886 skip = 1;
887 }
888 break;
889 default:
890 if (data[i] == DELNONCH) {
891 assert(i + 1 < datalen);
892 if ((parmlen -= 2) < 0)
893 break;
894 *pt++ = DELNONCH;
895 *pt++ = data[++i];
896 skip = 0;
897 }
898 else {
899 *pt++ = data[i];
900 parmlen--;
901 skip = 0;
902 }
903 break;
904 }
905 }
906 pcblitt.newstate = skip ? 0 : pcblittda;
907 return parmlen;
908 }
909
910
911 /* PARSEMD: Parser for markup declarations.
912 It returns a token each time it is called.
913
914 */
parsemd(pt,namecase,lpcb,tokenlen)915 int parsemd(pt, namecase, lpcb, tokenlen)
916 UNCH *pt; /* Token buffer: >=tokenlen+2. */
917 int namecase; /* Case translation: ENTCASE NAMECASE AVALCASE. */
918 struct parse *lpcb; /* Parse control block for literal parse. */
919 UNS tokenlen; /* Max length of expected token: NAMELEN LITLEN */
920 {
921 struct parse *pcb; /* Current parse control block. */
922
923 pcb = (lpcb) ? &pcbmd : &pcbmdc; /* If no literal pcb, dcl is comment. */
924
925 doparse: while (parse(pcb)==EE_)
926 if (es<mdessv) {synerr(37, pcb); mdessv = es;}
927 if (pcb->action==PIE_) { /* PI entity reference not allowed. */
928 entpisw = 0; /* Reset PI entity indicator. */
929 synerr(59, pcb);
930 goto doparse;
931 }
932 ++parmno; /* Increment parameter counter. */
933 switch (pcb->action) {
934 case CDR: /* COM[1] (MINUS) occurred previously. */
935 REPEATCC;
936 return (int)pcb->action;
937 case LIT: /* Literal: CDATA with LIT delimiter. */
938 parselit(pt, lpcb, tokenlen, lex.d.lit);
939 return (int)pcb->action;
940 case LITE: /* Literal: CDATA with LITA delimiter. */
941 parselit(pt, lpcb, tokenlen, lex.d.lita);
942 return((int)(pcb->action = LIT));
943 case RNS: /* Reserved name started (after RNI). */
944 parsenm(pt, NAMECASE);
945 return (int)pcb->action;
946 case NAS: /* Name started. */
947 if (namecase!=AVALCASE) {
948 parsenm(pt, namecase);
949 return (int)pcb->action;
950 }
951 /* Treat attribute value as name character string. */
952 case NMT: /* Name token string. */
953 parsetkn(pt, NMC, (int)tokenlen); /* Get undelimited value. */
954 return (int)pcb->action;
955 case NUM: /* Number or number token string. */
956 parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen);
957 if (tokenlen > NAMELEN) pcb->newstate = 0;
958 return (int)pcb->action;
959 case PENR:
960 REPEATCC;
961 return (pcb->action = PEN);
962 case EOD_:
963 exiterr(133, pcb);
964 /* EXIT */
965 default: /* End of declaration. */
966 return (int)pcb->action; /* EMD GRPS MGRP PEN PGRP */
967 }
968 }
969 /* PARSEMOD: If the declared content was a keyword, the token count is zero
970 and it is only necessary to save the type. Otherwise,
971 collect the outermost token count and model type bytes for a model.
972 The count includes tokens found in nested groups also.
973 After building the model, parse for its occurrence indicator.
974 */
parsemod(dctype)975 struct thdr *parsemod(dctype)
976 int dctype; /* Content type (0=model). */
977 {
978 gbuf[0].ttype = (UNCH)dctype; /* Initialize content flags byte. */
979 if (dctype) {gbuf[0].tu.tnum = 0; return gbuf;} /* Return if not model. */
980
981 gbuf[0].tu.tnum = 0; /* Don't count 1st group or model header. */
982 gbuf[1].ttype = 0; /* Initialize 1st group type ... */
983 gbuf[1].tu.tnum = 0; /* and count. */
984 grplvl = 1; /* Content model is 1st level group. */
985 pcbgrcm.newstate = 0; /* Go parse the model group. */
986 /* Empty group is trapped during syntax parse; other errors return NULL. */
987 if (!parsegcm(&pcbgrcm, &gbuf[1], &gbuf[0])) return (struct thdr *)0;
988 parse(&pcbgrcs); /* Get the model suffix, if there is one. */
989 switch(pcbgrcs.action) {
990 case OPT: /* OPT occurrence indicator for model. */
991 SET(gbuf[1].ttype, TOPT|TXOPT);
992 break;
993 case REP: /* REP occurrence indicator for model. */
994 SET(gbuf[1].ttype, TREP|TXREP);
995 break;
996 case OREP: /* OREP occurrence indicator for model. */
997 SET(gbuf[1].ttype, TOREP|TXOREP);
998 break;
999 case EE_:
1000 if (es < mdessv) {
1001 synerr(37, &pcbmd);
1002 mdessv = es;
1003 }
1004 default: /* RCR_: Repeat char and return. */
1005 break;
1006 }
1007 if (sw.swambig) ambig(); /* Check content model for ambiguity. */
1008 return gbuf;
1009 }
1010 /* PARSEGCM: Collect token headers (struct thdr) into a group (array).
1011 An etd is defined for each GI (if none exists) and its pointer is
1012 stored in the header. The function is called recursively.
1013 */
parsegcm(pcb,pgh,gbuf)1014 struct thdr *parsegcm(pcb, pgh, gbuf)
1015 struct parse *pcb; /* Current parse control block. */
1016 struct thdr *pgh; /* Current group header in group buffer. */
1017 struct thdr *gbuf; /* Header for outermost group (model). */
1018 {
1019 #define MCON gbuf->ttype /* Model type (content attributes). */
1020 struct thdr *pg=pgh; /* Current group token. */
1021 struct thdr *pgsv=pgh; /* Saved current token for occ indicator. */
1022 int optcnt = 0; /* Count of optional tokens in group. */
1023 int essv = es; /* Entity stack level when grp started. */
1024
1025 while (gbuf->tu.tnum<=GRPGTCNT && pgh->tu.tnum<=GRPCNT && parse(pcb)!=GRPE)
1026 switch (pcb->action) {
1027
1028 case NAS_: /* GI name: get its etd and store it. */
1029 ++gbuf->tu.tnum; ++pgh->tu.tnum;
1030 (pgsv = ++pg)->ttype = TTETD;
1031 pg->tu.thetd = etddef(parsenm(tbuf, NAMECASE));
1032 SET(MCON, MGI);
1033 continue;
1034
1035 case RNS_: /* Reserved name started (#PCDATA). */
1036 parsenm(tbuf, NAMECASE);
1037 if (ustrcmp(tbuf+1, key[KPCDATA])) {
1038 mderr(116, ntoa(gbuf->tu.tnum), tbuf+1);
1039 return (struct thdr *)0;
1040 }
1041 /* If #PCDATA is the first non-group token, model is a phrase. */
1042 if (!MCON) SET(MCON, MPHRASE);
1043 case DTAG: /* Data tag template ignored; treat as #PCDATA. */
1044 if (pcb->action==DTAG) SET(pgh->ttype, TTSEQ); /* DTAG is SEQ grp. */
1045 ++gbuf->tu.tnum; ++pgh->tu.tnum;
1046 (++pg)->ttype = TTCHARS+TOREP;/* #PCDATA is OPT and REP. */
1047 pg->tu.thetd = ETDCDATA;
1048 ++optcnt; /* Ct opt tokens to see if grp is opt.*/
1049 SET(MCON, MCHARS);
1050 continue;
1051
1052 case GRP_: /* Group started. */
1053 ++gbuf->tu.tnum; ++pgh->tu.tnum;
1054 (pgsv = ++pg)->ttype = 0; /* Type will be set by connector. */
1055 pg->tu.tnum = 0; /* Group has number instead of etd. */
1056 if (++grplvl>GRPLVL) {
1057 mderr(115, ntoa(gbuf->tu.tnum), (UNCH *)0);
1058 return (struct thdr *)0;
1059 }
1060 pg = parsegcm(pcb, pg, gbuf);
1061 if (!pg) return (struct thdr *)0;
1062 if (GET(pgsv->ttype, TOPT)) ++optcnt; /* Indicate nested opt grp. */
1063 --grplvl;
1064 continue;
1065
1066 case OREP: /* OREP occurrence indicator for current token.*/
1067 SET(pgsv->ttype, TREP|TXREP);
1068 /* Now treat like OPT. */
1069 case OPT: /* OPT occurrence indicator for current token. */
1070 SET(pgsv->ttype, TXOPT);
1071 if (GET(pgsv->ttype, TOPT)) continue; /* Exit if nested opt grp. */
1072 SET(pgsv->ttype, TOPT);
1073 ++optcnt; /* Count opt tokens to see if grp is optional. */
1074 continue;
1075 case REP: /* REP occurrence indicator for current token. */
1076 SET(pgsv->ttype, TREP|TXREP);
1077 continue;
1078
1079 case OR: /* OR connector found. */
1080 if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTOR);
1081 else if (GET(pgh->ttype, TTAND)!=TTOR)
1082 mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1083 continue;
1084 case AND: /* AND connector found. */
1085 if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTAND);
1086 else if (GET(pgh->ttype, TTAND)!=TTAND)
1087 mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1088 continue;
1089 case SEQ: /* SEQ connector found. */
1090 if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTSEQ);
1091 else if (GET(pgh->ttype, TTAND)!=TTSEQ)
1092 mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1093 continue;
1094
1095 case EE_: /* Entity ended (correctly or incorrectly). */
1096 if (es<essv) {synerr(37, pcb); essv = es;}
1097 continue;
1098
1099 case PIE_: /* PI entity reference (not permitted). */
1100 entpisw = 0; /* Reset PI entity indicator. */
1101 synerr(59, pcb);
1102 continue;
1103
1104 default: /* Syntax errors return in disgrace. */
1105 synerr(37, pcb);
1106 return (struct thdr *)0;
1107 }
1108 if (pgh->tu.tnum>GRPCNT) {
1109 mderr(113, ntoa(gbuf->tu.tnum), (UNCH *)0);
1110 return (struct thdr *)0;
1111 }
1112 if (gbuf->tu.tnum>GRPGTCNT) {
1113 mderr(114, ntoa(gbuf->tu.tnum), (UNCH *)0);
1114 return (struct thdr *)0;
1115 }
1116 if (pgh->tu.tnum==1) SET(pgh->ttype, TTSEQ); /* Unit grp is SEQ. */
1117 /* An optional token in an OR group makes the group optional. */
1118 if (GET(pgh->ttype, TTMASK)==TTOR && optcnt) SET(pgh->ttype, TOPT);
1119 /* If all tokens in any group are optional, so is the group. */
1120 if (pgh->tu.tnum<=optcnt) SET(pgh->ttype, TOPT);
1121
1122 if (es!=essv) synerr(37, pcb);
1123 return pg; /* Return pointer to GRPS token. */
1124 }
1125 /* PARSENM: Parser for SGML names, which can be translated with LEXTRAN.
1126 The input is read from the entity stack. CC is 1st char of name.
1127 Returns a pointer to the parsed name.
1128 */
parsenm(tbuf,nc)1129 UNCH *parsenm(tbuf, nc)
1130 UNCH *tbuf; /* Buffer for name: >=NAMELEN+2. */
1131 int nc; /* Namecase translation: 1=yes; 0=no. */
1132 {
1133 UNCH len; /* Length of name (incl EOS & length byte). */
1134
1135 *(tbuf + (len = 1) ) = nc ? lextran[*FPOS] : *FPOS;
1136 while ((NEWCC, (int)lextoke[*FPOS]>=NMC) && (len<NAMELEN)) {
1137 TRACETKN(NMC, lextoke);
1138 if (lextoke[*(tbuf + ++len) = (nc ? lextran[*FPOS] : *FPOS)]==EOB) {
1139 --len;
1140 entget();
1141 }
1142 }
1143 REPEATCC; /* Put back the non-token character. */
1144 *(tbuf + ++len) = EOS; /* Terminate name with standard EOS. */
1145 *tbuf = ++len; /* Store length ahead of name. */
1146 return tbuf;
1147 }
1148 /* PARSETKN: Parser for start-tag attribute value tokens.
1149 First character of token is already in *FPOS.
1150 Returns a pointer to the parsed token.
1151 Parsed token has EOS but no length byte.
1152 */
1153 #ifdef USE_PROTOTYPES
parsetkn(UNCH * tbuf,UNCH scope,int maxlen)1154 UNCH *parsetkn(UNCH *tbuf, UNCH scope, int maxlen)
1155 #else
1156 UNCH *parsetkn(tbuf, scope, maxlen)
1157 UNCH *tbuf; /* Buffer for token: >=maxlen+1. */
1158 UNCH scope; /* Minimum lexical class allowed. */
1159 int maxlen; /* Maximum length of a token. */
1160 #endif
1161 {
1162 int i = 1;
1163 tbuf[0] = *FPOS;
1164 while (i < maxlen) {
1165 NEWCC;
1166 if (lextoke[*FPOS] < scope) {
1167 REPEATCC;
1168 break;
1169 }
1170 TRACETKN(scope, lextoke);
1171 if (*FPOS == EOBCHAR)
1172 entget();
1173 else
1174 tbuf[i++] = *FPOS;
1175 }
1176 tbuf[i] = EOS;
1177 return tbuf;
1178 }
1179 /* PARSESEQ: Parser for blank sequences (i.e., space and TAB characters ).
1180 First character of sequence is already in *FPOS.
1181 */
parseseq(tbuf,maxlen)1182 VOID parseseq(tbuf, maxlen)
1183 UNCH *tbuf; /* Buffer for storing found sequence. */
1184 int maxlen; /* Maximum length of a blank sequence. */
1185 {
1186 tbuf[0] = *FPOS;
1187 datalen = 1;
1188 for (;;) {
1189 NEWCC;
1190 if (*FPOS == EOBCHAR) {
1191 entget();
1192 continue;
1193 }
1194 if ((lextoke[*FPOS] != SEP && *FPOS != SPCCHAR)
1195 || datalen >= maxlen)
1196 break;
1197 tbuf[datalen++] = *FPOS;
1198 TRACETKN(SEP, lextoke);
1199 }
1200 }
1201 /* S2VALNM: Parser for attribute values that are tokenized like names.
1202 The input is read from a string (hence S ("string") 2 ("to") VALNM).
1203 It stops at the first bad character.
1204 Returns a pointer to the created name.
1205 */
1206 #ifdef USE_PROTOTYPES
s2valnm(UNCH * nm,UNCH * s,UNCH scope,int translate)1207 UNCH *s2valnm(UNCH *nm, UNCH *s, UNCH scope, int translate)
1208 #else
1209 UNCH *s2valnm(nm, s, scope, translate)
1210 UNCH *nm; /* Name to be created. */
1211 UNCH *s; /* Source string to be parsed as name. */
1212 UNCH scope; /* Minimum lexical class allowed. */
1213 int translate; /* Namecase translation: 1=yes; 0=no. */
1214 #endif
1215 {
1216 UNCH len = 0; /* Length of name (incl EOS and length). */
1217
1218 for (; (int)lextoke[*s] >= scope && len < NAMELEN; s++)
1219 nm[++len] = translate ? lextran[*s] : *s;
1220 nm[++len] = EOS; /* Terminate name with standard EOS. */
1221 *nm = ++len; /* Store length ahead of name. */
1222 return nm;
1223 }
1224 /* PARSEVAL: Parser for attribute values.
1225 The input is read from a string and tokenized in a buffer.
1226 The input is terminated by EOS.
1227 Each token is preceded by its actual length; there is no EOS.
1228 If an error occurs while parsing, or
1229 if a token doesn't conform, set the token count to 0 to show that
1230 value was not tokenized and return the error code.
1231 After successful parse, return buffer length and 0 error code.
1232 The number of tokens found is set in external variable tokencnt.
1233 */
parseval(s,atype,tbuf)1234 int parseval(s, atype, tbuf)
1235 UNCH *s; /* Source string to be parsed as token list. */
1236 UNS atype; /* Type of token list expected. */
1237 UNCH *tbuf; /* Work area for tokenization. */
1238 {
1239 int t;
1240 UNCH *pt = tbuf;
1241
1242 pcbval.newstate = 0; tokencnt = 0;
1243 while (1) {
1244 for (;;) {
1245 pcbval.input = lextoke[*s];
1246 pcbval.state = pcbval.newstate;
1247 pcbval.newstate = (*(pcbval.ptab + pcbval.state)) [pcbval.input];
1248 pcbval.action = (*(pcbval.ptab + pcbval.state+1)) [pcbval.input];
1249 TRACEVAL(&pcbval, atype, s, tokencnt);
1250 if (pcbval.action != NOPA)
1251 break;
1252 s++;
1253 }
1254
1255
1256 switch (pcbval.action) {
1257 case INVA: /* Invalid character; terminate parse. */
1258 if (*s == '\0') goto alldone; /* Normal termination. */
1259 tokencnt = 0; /* Value was not tokenized. */
1260 return(14);
1261 case LENA: /* Length limit of token exceeded; end parse. */
1262 tokencnt = 0; /* Value was not tokenized. */
1263 return(15);
1264 default: /* Token begun: NUMA, NASA, or NMTA. */
1265 break;
1266 }
1267
1268 ++tokencnt; /* One token per iteration. */
1269 switch (atype) {
1270 case AENTITY:
1271 if (tokencnt>1) {tokencnt = 0; return(16);}
1272 case AENTITYS:
1273 if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
1274 s2valnm(pt, s, NMC, ENTCASE);
1275 break;
1276
1277 case AID:
1278 case AIDREF:
1279 case ANAME:
1280 case ANOTEGRP:
1281 if (tokencnt>1) {tokencnt = 0; return(16);}
1282 case AIDREFS:
1283 case ANAMES:
1284 if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
1285 s2valnm(pt, s, NMC, NAMECASE);
1286 break;
1287
1288 case ANMTGRP:
1289 case ANMTOKE:
1290 if (tokencnt>1) {tokencnt = 0; return(16);}
1291 case ANMTOKES:
1292 /* No test needed because NMTA, NUMA and NASA are all valid. */
1293 s2valnm(pt, s, NMC, NAMECASE);
1294 break;
1295
1296 case ANUMBER:
1297 if (tokencnt>1) {tokencnt = 0; return(16);}
1298 case ANUMBERS:
1299 if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
1300 s2valnm(pt, s, NU, NAMECASE);
1301 t = lextoke[s[*pt - 2]];
1302 if (t == NMS || t == NMC) {tokencnt = 0; return(17);}
1303 break;
1304
1305 case ANUTOKE:
1306 if (tokencnt>1) {tokencnt = 0; return(16);}
1307 case ANUTOKES:
1308 if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
1309 s2valnm(pt, s, NMC, NAMECASE);
1310 break;
1311 }
1312 *pt -= 2;
1313 s += *pt;
1314 pt += *pt + 1;
1315 }
1316 alldone:
1317 *pt++ = EOS;
1318 if (*tbuf == '\0')
1319 return 25;
1320 if (atype < ATKNLIST)
1321 *tbuf += 2; /* include length and EOS */
1322 return 0;
1323 }
1324 /*
1325 Local Variables:
1326 c-indent-level: 5
1327 c-continued-statement-offset: 5
1328 c-brace-offset: -5
1329 c-argdecl-indent: 0
1330 c-label-offset: -5
1331 comment-column: 30
1332 End:
1333 */
1334