1 #include "sgmlincl.h"         /* #INCLUDE statements for SGML parser. */
2 /* PARSE: Parse a source input stream with specified lexical and state tables.
3           Return to caller with action code.
4 */
parse(pcb)5 int parse(pcb)
6 struct parse *pcb;            /* Current parse control block. */
7 {
8      int rc;                  /* Return code from ENTREF. */
9 
10      while (1) {
11           NEWCC;
12           pcb->input = pcb->plex[*FPOS];
13           pcb->state = pcb->newstate;
14           pcb->newstate = (*(pcb->ptab + pcb->state)) [pcb->input];
15           pcb->action = (*(pcb->ptab + pcb->state + 1)) [pcb->input];
16           TRACEPCB(pcb);
17           switch (pcb->action) {
18           case RC2_:          /* Back up two characters. */
19                REPEATCC;
20           case RCC_:          /* Repeat current character. */
21                REPEATCC;
22           case NOP_:          /* No action necessary.*/
23                continue;
24 
25           case RS_:           /* Record start: ccnt=0; ++rcnt.*/
26                ++RCNT; CTRSET(RSCC);
27                continue;
28 
29           case GET_:          /* EOB or dull EOS or EE found: keep going.*/
30                if (entget()==-1) {pcb->action = EOD_; break;}/* Signal if EOD.*/
31                continue;
32 
33           case EOF_:          /* Illegal entity end; return EE_. */
34                synerr(E_EOF, pcb);
35                pcb->action = EE_;
36           case EE_:           /* Important EOS or EE found: return to caller.*/
37                if (entget()==-1) pcb->action = EOD_;   /* Signal if EOD. */
38                break;
39 
40           case PER_:          /* Parameter entity reference. */
41                REPEATCC;           /* Use PERO as 1st char of entity name. */
42                parsenm(entbuf, ENTCASE);
43                parse(&pcbref);     /* Handle REFC or other terminator. */
44                rc = entref(entbuf);
45                if (rc==ENTPI) {pcb->action = PIE_; break;}
46                continue;
47 
48           case ER_:           /* General entity reference; continue. */
49                parsenm(entbuf, ENTCASE);
50                parse(&pcbref);     /* Handle REFC or other terminator. */
51 	       rc = entref(entbuf);
52                if (rc==ENTDATA) {pcb->action = DEF_; break;}
53                if (rc==ENTPI) {pcb->action = PIE_; break;}
54                continue;
55 
56 
57           case PEX_:          /* Parameter entity reference; return. */
58                REPEATCC;           /* Use PERO as 1st char of entity name. */
59           case ERX_:          /* General entity reference; return. */
60                parsenm(entbuf, ENTCASE);
61                parse(&pcbref);     /* Handle REFC or other terminator. */
62                rc = entref(entbuf);
63                if (rc == ENTDATA){
64 		    /* Reference to external data/subdoc entity in replaceable
65 		       character data. */
66 		    if (BITON(entdatsw, NDECONT)) {
67 			 switch (((PNE)data)->nextype) {
68 			 case ESNCDATA:
69 			 case ESNSDATA:
70 			      /* The standard says `non-SGML data entity'
71 				 but the amendment should have changed it
72 				 to `external data entity'. */
73 			      synerr(145, pcb);
74 			      break;
75 			 case ESNNDATA:
76 			 case ESNSUB:
77 			      /* This is definitely illegal. */
78 			      synerr(141, pcb);
79 			      break;
80 			 }
81 			 entdatsw = 0;
82 			 continue;
83 		    }
84 		    pcb->action = DEF_;
85 	       }
86                else if (rc == ENTPI) {
87 		    /* Reference to PI entity not allowed in replaceable
88 		       character data. */
89 		    synerr(59, pcb);
90 		    entpisw = 0;
91 		    continue;
92 	       }
93                else if (rc) pcb->action = EE_;
94                break;
95 
96           case CRN_:          /* Character reference: numeric. */
97                parsetkn(entbuf, NU, NAMELEN);
98                parse(&pcbref);     /* Handle reference terminator. */
99                pcb->action = charrefn(entbuf, pcb);
100                if (pcb->action==CRN_) continue;   /* Invalid reference */
101                break;
102 
103           case CRA_:           /* Character reference: alphabetic. */
104                parsenm(entbuf, NAMECASE);
105                parse(&pcbref);     /* Handle reference terminator. */
106                charrefa(entbuf);
107 	       if (docelsw) synerr(232, pcb);
108                continue;
109 
110           case SYS_:          /* Invalid NONCHAR: send msg and ignore. */
111                synerr(E_SYS, pcb);
112 	       if (*FPOS == DELNONCH) NEWCC;
113                continue;
114 
115           case NON_:	      /* Valid NONCHAR: prefix and shift encoding. */
116                synerr(60, pcb);
117 	       pcb->action = datachar(*FPOS, pcb);
118                break;
119 	  case NSC_:
120                synerr(60, pcb);
121 	       NEWCC;
122 	       nonchbuf[1] = *FPOS;
123 	       pcb->action = NON_;
124 	       break;
125           case PCI_:          /* Previous character was invalid (INV_). */
126                REPEATCC;
127           case INV_:          /* Markup ended by invalid char; repeat char. */
128                synerr(9, pcb);
129                REPEATCC;
130                break;
131 
132           case LNR_:          /* Previous char exceeded len; back up to it. */
133                REPEATCC;
134           case LEN_:          /* Token too long; ignore excess character. */
135                synerr(3, pcb);
136                continue;
137 
138           case RCR_:          /* Repeat current char and return to caller. */
139                REPEATCC;
140           default:            /* Actions for specific parse. */
141                break;
142           }
143           return (int)pcb->action;
144      }
145 }
146 /* CHARREFA: Resolve an alphabetical reference to a function character
147              and put the character in the read buffer.
148              If reference is bad, issue an error message.
149 */
charrefa(r)150 VOID charrefa(r)
151 UNCH *r;                      /* Undelimited char ref (with length and EOS). */
152 {
153      UNCH thechar;
154 
155      thechar = mapsrch(funtab, r+1);
156      if (thechar == 0)
157 	  synerr(62, &pcbref);
158      else {
159           /* This isn't ideal, because the character position will still
160 	     be wrong for one line. */
161 	  if (thechar == RSCHAR) RCNT--;
162 	  setcurchar(thechar);
163           REPEATCC;
164      }
165 }
166 
167 /* Make the current character ch. */
168 
setcurchar(ch)169 VOID setcurchar(ch)
170 int ch;
171 {
172      /* If we're reading directly from an internal entity, we can't
173 	change the entity, since the entity might be referenced again.
174 	So in this case we copy the entity.  This is inefficient, but
175 	it will only happen in a case like this:
176 
177 	<!entity % amp "&">
178 	<!entity e "x%amp;#SPACE;">
179 
180 	Usually character references will have been processed while the
181 	entity was being defined.  */
182      if (*FPOS != ch) {
183 	  if (!FILESW && !COPIEDSW) {
184 	       UNCH *s = savestr(FBUF + 1);
185 	       FPOS = s + (FPOS - FBUF - 1);
186 	       FBUF = s - 1;
187 	       COPIEDSW = 1;
188 	  }
189 	  *FPOS = ch;
190      }
191 }
192 
193 /* CHARREFN: Resolve a numeric character reference.
194              If reference is bad, issue an error message.
195 */
196 
charrefn(r,pcb)197 int charrefn(r, pcb)
198 UNCH *r;                      /* Undelimited character reference. */
199 struct parse *pcb;            /* Current parse control block. */
200 {
201      int thechar;
202 
203      thechar = atoi((char *)r);
204      if (thechar<0 || thechar>255) {
205           synerr(61, &pcbref);
206           return((int)pcb->action);
207      }
208      return datachar(thechar, pcb);
209 }
210 
211 /* Return ch as a datachar.  If this a non-SGML character which might
212 confuse the parser, shift it to a code that won't and place it in a
213 special buffer which has DELNONCH in the preceding byte.  Otherwise
214 put it the read buffer. */
215 
datachar(ch,pcb)216 int datachar(ch, pcb)
217 int ch;
218 struct parse *pcb;
219 {
220      switch (ch) {
221      case EOS:
222      case EOFCHAR:
223      case EOBCHAR:
224      case GENRECHAR:
225      case DELCDATA:
226      case DELSDATA:
227      case DELNONCH:
228 	  /* A potentially confusing character which must be prefixed
229 	     with DELNONCH. */
230           nonchbuf[1] = SHIFTNON((UNCH)ch);
231           return NON_;
232      }
233      setcurchar(ch);
234      /* If in content, return DCE_ for element content, DAF_ for mixed.  */
235      /* If not content, it must be a literal parse, so return MLA_. */
236      if (pcb == conpcb) {
237 	  if (pcb == &pcbcone)
238 	       return DCE_;
239 	  else {
240 	       data = FPOS;
241 	       /* Action for DAF_ will do REPEATCC. */
242 	       NEWCC;
243 	       return DAF_;
244 	  }
245      }
246      else
247 	  return MLA_;
248 }
249 /* INITATT: Initialize al with adl. */
250 
initatt(adl)251 VOID initatt(adl)
252 struct ad *adl;
253 {
254      notadn = 0;              /* No NOTATION attribute yet. */
255      conrefsw = 0;            /* Assume no content reference att. */
256      /* Copy attribute definition list as a template. */
257      memcpy((UNIV)al, (UNIV)adl, (1+ADN(adl))*ADSZ);
258 }
259 
260 /* PARSEATT: Parse attribute specification list.
261              Make a current copy of the attribute definition list
262              and update it with the user's specifications.
263              Indicate each attribute that was specified in the
264              list (as opposed to defaulted) by setting the ASPEC flag.
265              If no attributes were specified, return NULL.  Otherwise,
266              if in the prolog, make a permanent copy of the list and
267              return its pointer.  If not in the prolog, return al.
268 */
parseatt(adl,pt)269 struct ad *parseatt(adl, pt)
270 struct ad *adl;               /* Attribute definition list. */
271 UNCH *pt;                     /* Tokenization area: tbuf[TAGLEN+ATTSPLEN]. */
272 {
273      UNCH *antvptr;
274      UNCH *nm = 0;            /* Pointer to saved name in tbuf (with length). */
275      int adn = -1;            /* Position of attribute in list (-1=empty). */
276      UNCH *tbuflim = pt + ATTSPLEN;
277      mdessv = es;             /* Save es for checking entity nesting. */
278      initatt(adl);
279      while (pt<=tbuflim) {
280           parse(&pcbstag);
281           switch (pcbstag.action) {
282           case NVS:                     /* Att name or value token found. */
283                parsenm(pt, NAMECASE);   /* Case translation wanted on name. */
284                pt += *(nm = pt);        /* Save name while pointing past it. */
285                continue;
286 
287           case AVD:           /* Delimited value found. */
288           case AVDA:          /* Delimited value found (alternate delimiter). */
289                /* Find position (adn) of saved attribute name in list. */
290                adn = anmget((int)ADN(al), nm);
291                parselit(pt,
292 			(adn == 0 || ADTYPE(al, adn) == ACHARS)
293 			? &pcblitr
294 			: &pcblitt,
295 			LITLEN,
296 			(pcbstag.action==AVD) ? lex.d.lit : lex.d.lita);
297 	       if (adn == 0) {
298                     /* Error: unrecognized attribute name. */
299                     sgmlerr(13, &pcbstag, nm+1, pt);
300                     continue;
301                }
302                /* Tokenize and validate value; let it default if an error. */
303                /* Put value in list and bump ptr by the normalized length
304                   (which is always >= the actual length). */
305                if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
306 	       continue;
307           case AVU:           /* Attribute value found: undelimited. */
308 	       if (!sd.shorttag) sgmlerr(196, &pcbstag, (UNCH *)0, (UNCH *)0);
309 	       parsetkn(pt, NMC, LITLEN);
310                /* Find position (adn) of saved attribute name in list. */
311                if ((adn = anmget((int)ADN(al), nm))==0) {
312                     /* Error: unrecognized attribute name. */
313                     sgmlerr(13, &pcbstag, nm+1, pt);
314                     continue;
315                }
316                /* Tokenize and validate value; let it default if an error. */
317                /* Put value in list and bump ptr by the normalized length
318                   (which is always >= the actual length). */
319                if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
320                continue;
321 
322           case NASV:          /* Saved NVS was really an NTV. */
323                REPEATCC;           /* Put back next token starter. */
324                pt = nm;            /* Back up to NVS. */
325           case NTV:           /* Name token value found. */
326 	       if (!sd.shorttag) sgmlerr(195, &pcbstag, (UNCH *)0, (UNCH *)0);
327                if (pcbstag.action==NTV) parsenm(pt, NAMECASE);
328                if ((adn = antvget((int)ADN(al), pt, &antvptr))==0) {
329                     /* Error: unrecognized name token value. */
330                     sgmlerr(74, &pcbstag, pt+1, (UNCH *)0);
331                     continue;
332                }
333                /* Validate value; let it default if an error. */
334                /* Put value in list and bump ptr by the normalized length
335                   (which is always >= the actual length). */
336                if (!attval(0, antvptr+1, adn, adl)) pt += ADLEN(al,adn);
337                continue;
338 
339           default:            /* All attributes have been parsed. */
340                REPEATCC;      /* Put next char back for tag close parse. */
341                break;
342           }
343           break;
344      }
345      if (pt>tbuflim) synerr(75, &pcbstag);
346      if (es!=mdessv) synerr(37, &pcbstag);
347      if (adn<0) return((struct ad *)0); /* List was empty. */
348      TRACEADL(al);
349      return al;
350 }
351 /* ATTVAL: Validate a specified attribute value.  Issue a message if it is
352            the wrong type (or otherwise is not up to spec), and use the default.
353            Call PARSEVAL to tokenize the value, unless it is a CDATA string.
354            If the attribute is a group, the value is a string.
355            For other types, the token count is set by PARSEVAL if the value
356            is syntactically correct.  If incorrect (or if CDATA) the token
357            count is zero (i.e., the value is a string).
358            The length of a token does not include the length byte, and
359            there is no EOS.  A string length (as always) includes both
360            the length byte and the EOS.
361            If it is a CONREF attribute, set a switch for STAG().
362            If it is a CURRENT attribute, store the value as the new default.
363 */
364 #define DEFVAL adl[adn].addef /* Default value of current attribute. */
365 #define DEFNUM adl[adn].adnum /* Default group size of current attribute. */
366 #define DEFLEN adl[adn].adlen /* Length of default value of current attribute.*/
attval(mtvsw,adval,adn,adl)367 int attval(mtvsw, adval, adn, adl)
368 int mtvsw;                    /* Must tokenize value: 1=yes; 0=no. */
369 UNCH *adval;                  /* Untokenized attribute value. */
370 int adn;                      /* Attribute's position in list. */
371 struct ad *adl;               /* Element's master att def list. */
372 {
373      int errcode;             /* Value/declaration conflict error code. */
374 
375      if (GET(ADFLAGS(al,adn), ASPEC))      /* Can't respecify same attribute. */
376           {sgmlerr(73, &pcbstag, ADNAME(al,adn), adval); return(1);}
377      SET(ADFLAGS(al,adn), ASPEC);          /* Indicate att was specified. */
378      if (GET(ADFLAGS(al,adn), ACONREF))    /* If attribute is content reference: */
379           conrefsw = TAGREF;            /* Set switch for STAG(). */
380      if (mtvsw && ADTYPE(al,adn)!=ACHARS) {
381           /* If no syntax errors, check for proper group membership. */
382           if ( ((errcode = parseval(adval, ADTYPE(al,adn), lbuf))==0)
383             && GET(ADFLAGS(al,adn), AGROUP)
384             && !amemget(&al[adn], ADNUM(al,adn), lbuf) ) errcode = 18;
385           /* If syntax or group membership error, send message and exit. */
386           if (errcode) {
387                sgmlerr(errcode, &pcbstag, ADNAME(al,adn), adval);
388                SET(ADFLAGS(al,adn), AERROR);
389                return(1);
390           }
391           /* Replace specified value in adval with tokenized in lbuf. */
392 	  ustrcpy(adval, lbuf);
393           if (BITOFF(ADFLAGS(al,adn), AGROUP)) ADNUM(al,adn) = (UNCH)tokencnt;
394      }
395      if (!mtvsw)
396 	  adval--;
397      /* If attribute is FIXED, specified value must equal default. */
398      if (BITON(ADFLAGS(al,adn), AFIXED) && ustrcmp(adval, DEFVAL)) {
399 	  /* Since the value has been tokenized, don't use it in the
400 	     error message. */
401           sgmlerr(67, &pcbstag, ADNAME(al,adn), (UNCH *)0);
402           SET(ADFLAGS(al,adn), AERROR);
403           return(1);
404      }
405      ADLEN(al,adn) = vallen(ADTYPE(al,adn), ADNUM(al,adn), adval);
406      if (ADLEN(al,adn) > LITLEN) {
407 	  sgmlerr(224, &pcbstag, ADNAME(al,adn), (UNCH *)0);
408 	  SET(ADFLAGS(al,adn), AERROR);
409 	  return 1;
410      }
411      ADVAL(al,adn) = adval;
412      /* If attribute is CURRENT, value is new default.*/
413      if (GET(ADFLAGS(al,adn), ACURRENT)) {
414           if (ADLEN(al,adn)>DEFLEN) {
415                ds.attdef += (ADLEN(al,adn) - DEFLEN);
416                DEFLEN = ADLEN(al,adn);
417           }
418           DEFVAL = replace(DEFVAL, ADVAL(al,adn));
419           DEFNUM = ADNUM(al,adn);
420      }
421      return(0);                   /* Indicate value was valid. */
422 }
423 /* ADLVAL: Validate the completed attribute definition list (defaults plus
424            specified values).  Issue a message if an
425            attribute is required or current and its value is NULL.
426 */
adlval(adsz,newetd)427 VOID adlval(adsz, newetd)
428 int adsz;                     /* Size of list. */
429 struct etd *newetd;           /* Element type definition for this element. */
430 {
431      int adn = 1;             /* Position in list. */
432      UNCH *npt, *pt;          /* Ptr save areas. */
433      UNCH nptsv;              /* Save area for ptr value (length?). */
434      struct dcncb *dpt;       /* Save area for dcncb ptr. */
435 
436      aentctr = 0;             /* Number of AENTITY tokens in this att list. */
437      idrctr = 0;              /* Number of IDREF tokens in this att list. */
438      do {
439           if (ADVAL(al,adn)==NULL) {                      /* NULL value */
440                if (GET(ADFLAGS(al,adn), AREQ+ACURRENT)) { /*Error if REQ, CURRENT*/
441                     sgmlerr(19, &pcbstag, ADNAME(al,adn), (UNCH *)0);
442                     SET(ADFLAGS(al,adn), AINVALID);
443                }
444           }
445           else switch (ADTYPE(al,adn)) {
446           case AENTITY:       /* Return data ecb pointer if valid entity. */
447                aenttst(adn, ADVAL(al,adn));
448                break;
449           case AENTITYS:      /* Return data ecb pointers if valid entities. */
450                pt = ADVAL(al,adn);
451                tokencnt = (int)ADNUM(al,adn);
452                while (tokencnt--) {
453                     nptsv = *(npt = pt + *pt+1);
454                     *pt += 2; *npt = EOS;
455                     aenttst(adn, pt);
456                     *pt -= 2; *(pt = npt) = nptsv;
457                }
458                break;
459           case AID:
460                /* Define ID; msg if it already exists. */
461 	       if (iddef(ADVAL(al,adn))) {
462 		    sgmlerr(71, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
463 		    SET(ADFLAGS(al,adn), AINVALID);
464 		    continue;
465 	       }
466 	       ++ds.idcnt;
467                break;
468           case AIDREF:
469                idreftst(adn, ADVAL(al,adn));
470                break;
471           case AIDREFS:
472                pt = ADVAL(al,adn);
473                tokencnt = (int)ADNUM(al,adn);
474                while (tokencnt--) {
475                     nptsv = *(npt = pt + *pt+1);
476                     *pt += 2; *npt = EOS;
477                     idreftst(adn, pt);
478                     *pt -= 2; *(pt = npt) = nptsv;
479                }
480                break;
481           case ANOTEGRP:      /* Return notation identifier. */
482                if (GET(ADFLAGS(al,adn), ASPEC)) notadn = adn;/*NOTATION specified*/
483                if ((dpt = dcnfind(ADVAL(al,adn)))==0) {
484                     sgmlerr(77, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
485                     SET(ADFLAGS(al,adn), AINVALID);
486                }
487                else ADDATA(al,adn).x = dpt;
488                break;
489           }
490 	  if (!sd.shorttag && !sd.omittag && ADVAL(al,adn)!=NULL
491 	      && !GET(ADFLAGS(al,adn), ASPEC+AINVALID))
492 	       sgmlerr(197, &pcbstag, ADNAME(al,adn), (UNCH *)0);
493      } while ((adn+=BITON(ADFLAGS(al,adn),AGROUP) ? (int)ADNUM(al,adn)+1 : 1)<=adsz);
494 
495      /* Error if NOTATION specified with CONREF attribute or EMPTY element. */
496      if (notadn && (conrefsw
497 		    || (newetd && GET(newetd->etdmod->ttype, MNONE)))) {
498           sgmlerr((UNS)(conrefsw ? 84 : 76), &pcbstag,
499                ADNAME(al,notadn), ADVAL(al,notadn)+1);
500           SET(ADFLAGS(al,notadn), AINVALID);
501      }
502 }
503 /* AENTTST: Validate an individual ENTITY token in AENTITY or AENTITYS value.
504 */
aenttst(adn,pt)505 VOID aenttst(adn, pt)
506 int adn;                      /* Position in list. */
507 UNCH *pt;                     /* Ptr to current ENTITY token in value. */
508 {
509      struct entity *ept;      /* Save area for ecb ptr. */
510 
511      if (++aentctr>GRPCNT) {
512           sgmlerr(136, &pcbstag, ADNAME(al,adn), pt+1);
513           SET(ADFLAGS(al,adn), AINVALID);
514           return;
515      }
516      if ( (ept = entfind(pt))==0
517        && (ecbdeflt==0 || (ept = usedef(pt))==0) ) {
518           sgmlerr(ecbdeflt ? 151 : 72, &pcbstag, ADNAME(al,adn), pt+1);
519           SET(ADFLAGS(al,adn), AINVALID);
520           return;
521      }
522      if (ept->estore==ESX || ept->estore==ESC || ept->estore==ESN) {
523           /* Error if DCN has no notation identifier. */
524           if (ept->estore==ESN && NEXTYPE(ept->etx.n)!=ESNSUB
525 	      && !NEDCNDEFINED(ept->etx.n)) {
526                sgmlerr(78, &pcbstag, NEDCN(ept->etx.n)+1,
527                            pt+1);
528                SET(ADFLAGS(al,adn), AINVALID);
529           }
530      }
531      else {
532           sgmlerr(86, &pcbstag, ADNAME(al,adn), pt+1);
533           SET(ADFLAGS(al,adn), AINVALID);
534      }
535 }
536 /* IDREFTST: Validate an individual IDREF token in an IDREF or IDREFS value.
537 */
idreftst(adn,pt)538 VOID idreftst(adn, pt)
539 int adn;                      /* Position in list. */
540 UNCH *pt;                     /* Ptr to current IDREF token in value. */
541 {
542      struct fwdref *rp;
543      if (++idrctr>GRPCNT) {
544           sgmlerr(70, &pcbstag, ADNAME(al,adn), pt+1);
545           SET(ADFLAGS(al,adn), AINVALID);
546           return;
547      }
548      /* Note IDREF; indicate if ID exists. */
549      if ((rp = idref(pt)) != 0)
550 	  rp->msg = saverr(69, &pcbstag, ADNAME(al,adn), pt+1);
551      ++ds.idrcnt;
552 }
553 /* ANMGET: Locate an attribute name in an attribute definition list.
554 */
anmget(adsz,nm)555 int anmget(adsz, nm)
556 int adsz;                     /* Size of list. */
557 UNCH *nm;                     /* Value to be found (with length byte). */
558 {
559      int adn = 0;             /* Position in list. */
560 
561      while (++adn <= adsz && ustrcmp(nm+1, ADNAME(al,adn))) {
562           if (BITON(ADFLAGS(al,adn), AGROUP)) adn += (int)ADNUM(al,adn);
563      }
564      return (adn > adsz) ? 0 : adn;
565 }
566 /* ANTVGET: Find the position of a name token value in an attribute list.
567             Return the position of the attribute definition, or zero
568             if none was found.  Set pp to the value, if non-NULL.
569 */
antvget(adsz,nm,pp)570 int antvget(adsz, nm, pp)
571 int adsz;                     /* Size of list. */
572 UNCH *nm;                     /* Value to be found (with length byte). */
573 UNCH **pp;		      /* Store value here */
574 {
575      int adn = 0;             /* Position in list. */
576 
577      while (++adn<=adsz) {
578           /* Test only name group members. */
579           if (BITON(ADFLAGS(al,adn), AGROUP)) {
580 	       int advn;      /* Position of value in sub-list. */
581                if ((advn = amemget(&al[adn], (int)ADNUM(al,adn), nm))!=0) {
582 		    if (pp)
583 			 *pp = al[adn+advn].adname;
584                     return adn;
585                }
586                adn += (int)ADNUM(al,adn);
587           }
588      }
589      return 0;
590 }
591 /* AMEMGET: Get the position of a member in an attribute name token group.
592             Returns the position, or zero if not found.
593             The length byte is ignored in the comparison so that final
594             form tokens from ATTVAL can be compared to group members.
595 */
amemget(anmtgrp,adsz,nm)596 int amemget(anmtgrp, adsz, nm)
597 struct ad anmtgrp[];          /* Name token group. */
598 int adsz;                     /* Size of group. */
599 UNCH *nm;                     /* Name to be found (with length byte). */
600 {
601      int adn = 0;             /* Position in group. */
602 
603      while ( ++adn<=adsz && ustrncmp(nm+1, anmtgrp[adn].adname+1, (UNS)*nm-1)) ;
604      return (adn>adsz) ? 0 : adn;
605 }
606 /* VALLEN: Returns the length of an attribute value for capacity
607            calculations.  Normally, the length is NORMSEP plus the number
608            of characters.  For tokenized lists, it is NORMSEP,
609            plus the number of characters in the tokens, plus
610            NORMSEP for each token.
611 	   ACHARS and tokenized lists don't have a length byte.
612 
613 */
vallen(type,num,def)614 UNS vallen(type, num, def)
615 int type;                     /* ADTYPE(al,adn) */
616 int num;                      /* ADNUM(al,adn) */
617 UNCH *def;                    /* ADVAL(al,adn) */
618 {
619      if (type == ACHARS)
620 	  return ustrlen(def) + NORMSEP;
621      if (type < ATKNLIST)
622 	  return *def - 2 + NORMSEP;
623      return ustrlen(def) + num * (NORMSEP - 1) + NORMSEP;
624 }
625 /* PARSEGRP: Parse GI names, get their etds, and form an array of pointers
626              to them.  The array is terminated by a NULL pointer.
627              The number of pointers (including the NULL) is returned.
628              The grp buffer must have room for GRPCNT+1 etds.
629 */
parsegrp(grp,pcb,tbuf)630 UNS parsegrp(grp, pcb, tbuf)
631 struct etd *grp[];            /* Buffer for building the group. */
632 struct parse *pcb;            /* Current parse control block. */
633 UNCH *tbuf;
634 {
635      int grpcnt = 0;          /* Number of etds in the group. */
636      int i;
637      int essv = es;           /* Entity stack level when grp started. */
638 
639      while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
640           switch (pcb->action) {
641           case NAS_:          /* GI name: get its etd for the group. */
642                grp[grpcnt] = etddef(parsenm(tbuf, NAMECASE));
643 	       for (i = 0; i < grpcnt; i++)
644 		    if (grp[i] == grp[grpcnt]) {
645 			 mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->etdgi + 1);
646 			 break;
647 		    }
648 	       if (i == grpcnt)
649 		    grpcnt++;
650                continue;
651 
652           case EE_:           /* Entity ended (correctly or incorrectly). */
653                if (es<essv) {synerr(37, pcb); essv = es;}
654                continue;
655 
656           case PIE_:          /* PI entity reference (invalid). */
657                entpisw = 0;   /* Reset PI entity indicator. */
658                synerr(59, pcb);
659                continue;
660 
661           default:
662                break;
663           }
664           break;
665      }
666      grp[grpcnt++] = 0;       /* NULL pointer indicates end of group. */
667      if (es!=essv) synerr(37, pcb);
668      return grpcnt;           /* Return number of ptrs in group. */
669 }
670 /* PARSNGRP: Parse notation names, get their dcncbs, and form an array of
671              pointers to them.  The array is terminated by a NULL pointer.
672              The number of pointers (including the NULL) is returned.
673              The grp buffer must have room for GRPCNT+1 members.
674 */
parsngrp(grp,pcb,tbuf)675 UNS parsngrp(grp, pcb, tbuf)
676 struct dcncb *grp[];          /* Buffer for building the group. */
677 struct parse  *pcb;           /* Current parse control block. */
678 UNCH *tbuf;
679 {
680      int grpcnt = 0;          /* Number of members in the group. */
681      int i;
682      int essv = es;           /* Entity stack level when grp started. */
683 
684      while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
685           switch (pcb->action) {
686           case NAS_:          /* Member name: get its control block. */
687                grp[grpcnt] = dcndef(parsenm(tbuf, NAMECASE));
688 	       for (i = 0; i < grpcnt; i++)
689 		    if (grp[i] == grp[grpcnt]) {
690 			 mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->ename + 1);
691 			 break;
692 		    }
693 	       if (i == grpcnt)
694 		    grpcnt++;
695                continue;
696 
697           case EE_:           /* Entity ended (correctly or incorrectly). */
698                if (es<essv) {synerr(37, pcb); essv = es;}
699                continue;
700 
701           case PIE_:          /* PI entity reference (invalid). */
702                entpisw = 0;   /* Reset PI entity indicator. */
703                synerr(59, pcb);
704                continue;
705 
706           default:
707                break;
708           }
709           break;
710      }
711      grp[grpcnt++] = 0;       /* NULL pointer indicates end of group. */
712      if (es!=essv) synerr(37, pcb);
713      return grpcnt;           /* Return number of ptrs in group. */
714 }
715 /* COPYGRP: Allocate storage for a group and copy the group into it.
716 */
copygrp(pg,grpsz)717 PETD *copygrp(pg, grpsz)
718 PETD pg[];                    /* Pointer to a group (array of etd ptrs). */
719 UNS grpsz;                    /* Number of ptrs in grp, including final NULL. */
720 {
721      UNS glen;                /* Group length in characters. */
722      PETD *gnm;               /* Ptr to permanent name group. */
723 
724      if (pg==0) return (PETD *)0;
725      glen = grpsz * sizeof(struct etd *);
726      memcpy( (UNIV)(gnm = (struct etd **)rmalloc(glen)) , (UNIV)pg, glen );
727      return gnm;
728 }
729 /* INGRP: Locate an etd in a name group and return its index+1 (or zero
730           if not found).
731 */
ingrp(pg,ketd)732 int ingrp(pg, ketd)
733 PETD pg[];                    /* Array of pointers to etds. */
734 PETD ketd;                    /* Pointer to etd to be found in group. */
735 {
736      int i = 0;               /* Array index. */
737 
738      while (pg[i]) if (pg[i++]==ketd) return i;
739      return 0;
740 }
741 /* PARSELIT: Parse a delimited string and collect it into a token.
742              Caller supplies buffer, which must be 1 longer than
743              maximum string allowed.
744              Caller also supplies character that delimits the string.
745              TODO: Return 1 if CDATA, SDATA or NONSGML occurred.
746 */
747 #ifdef USE_PROTOTYPES
parselit(UNCH * tbuf,struct parse * pcb,UNS maxlen,UNCH del)748 VOID parselit(UNCH *tbuf, struct parse *pcb, UNS maxlen, UNCH del)
749 #else
750 VOID parselit(tbuf, pcb, maxlen, del)
751 UNCH *tbuf;                   /* Work area for tokenization (parmlen+1). */
752 struct parse *pcb;            /* Current parse control block. */
753 UNS maxlen;                   /* Maximum length of token. */
754 UNCH del;                     /* Literal delimiter: LIT LITA PIC EOS */
755 #endif
756 {
757      UNCH *pt = tbuf;         /* Current pointer into tbuf. */
758      UNCH lexsv = pcb->plex[del];/* Saved value of delimiter in lexical table. */
759      int essv = es;           /* Entity stack level when literal started. */
760      UNCH datadel;            /* Delimiter for CDATA/SDATA entity. */
761      int parmlen = (int)maxlen + 1;  /* Working limit (to be decremented). */
762      int overflow = 0;	      /* Did the buffer overflow? */
763 
764      pcb->plex[del] = pcb->plex == lexlms ? lex.l.litc : lex.l.minlitc;
765 
766      /* The RPR_ action may cause the length of the literal to decrease by
767 	1 (this discards a final space in a minimum literal); so while
768 	building the literal, the length must be allowed to grow to
769 	maxlen + 1. */
770 
771      do {
772           switch (parse(pcb)) {
773                case LP2_:          /* Move 2nd char back to buffer; redo prev.*/
774                     REPEATCC;
775                case LPR_:          /* Move previous char to buffer; REPEATCC; */
776                     REPEATCC;
777                case MLA_:          /* Move character to buffer. */
778 		    if (parmlen <= 0) { overflow = 1; break; }
779                     *pt++ = *FPOS; --parmlen;
780                     continue;
781 
782                case FUN_:          /* Function char found; replace with space.*/
783 		    if (parmlen <= 0) { overflow = 1; break; }
784                     *pt++ = ' '; --parmlen;
785                     continue;
786 
787                case RSM_:          /* Record start: ccnt=0; ++rcnt.*/
788 		    ++RCNT; CTRSET(RSCC);
789 		    if (parmlen <= 0) { overflow = 1; break; }
790                     *pt++ = *FPOS; --parmlen;
791                     continue;
792 
793                case ERX_:          /* Entity reference: cancel LITC delim. */
794                case PEX_:          /* Parameter entity ref: cancel LITC delim.*/
795                     lexlms[del] = lexsv;
796                     continue;
797 
798                case EE_:
799                     if (es<essv) {
800                          synerr(37, pcb);
801                          essv = es;
802                     }
803                     /* If back at top level, re-enable the LITC delimiter. */
804                     if (es==essv) lexlms[del] = lex.l.litc;
805                     continue;
806 
807                case MLE_:          /* Char not allowed in minimum literal. */
808                     synerr(63, pcb);
809                     continue;
810 
811                case DEF_:          /* Data entity: add it to buffer. */
812 		    if (pcb == &pcblitt) {
813 			 int parmlensv = parmlen;
814 			 entdatsw = 0;
815 			 parmlen = tokdata(pt, parmlen);
816 			 if (parmlen < 0)
817 			      break;
818 			 pt += parmlensv - parmlen;
819 			 continue;
820 		    }
821 		    if (parmlen < datalen + 2) {
822 			 entdatsw = 0;
823 			 overflow = 1;
824 			 break;
825 		    }
826 		    parmlen -= datalen + 2;
827                     *pt++ = datadel =
828                          BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA;
829                     entdatsw = 0;
830                     memcpy( pt , data, datalen );
831                     pt += datalen;
832                     *pt++ = datadel;
833                     continue;
834 
835                case NON_:          /* Non-SGML char (delimited and shifted). */
836 		    if (parmlen < 2) { overflow = 1; break; }
837 		    parmlen -= 2;
838                     memcpy( pt , nonchbuf, 2 );
839                     pt += 2;
840                     continue;
841 
842                case RPR_:          /* Remove character from buffer. */
843                     --pt; ++parmlen;
844                     break;
845 
846                case EOD_:
847                     exiterr(92, pcb);
848 
849                default:
850                     break;
851           }
852           break;
853      } while (!overflow && pcb->action!=TER_);
854 
855      if (parmlen <= 0) {
856 	  --pt;
857 	  overflow = 1;
858      }
859      if (overflow)
860 	  sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0);
861 
862      datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */
863      *pt++ = EOS;
864      pcb->plex[del] = lexsv;     /* Restore normal delimiter handling. */
865      if (es!=essv) synerr(37, pcb);
866 }
867 
868 /* Handle a data entity in a tokenized attribute value literal.
869 Parmlen is amount of space left.  Return new parmlen. If there's not
870 enough space return -1, and copy up to parmlen + 1 characters.  Only
871 tokenization should be done, not attribute value interpretation. */
872 
tokdata(pt,parmlen)873 int tokdata(pt, parmlen)
874 UNCH *pt;
875 int parmlen;
876 {
877      int skip = (pcblitt.newstate == 0);
878      int i;
879 
880      for (i = 0; parmlen >= 0 && i < datalen; i++) {
881 	  switch (data[i]) {
882 	  case SPCCHAR:
883 	       if (!skip) {
884 		    *pt++ = data[i];
885 		    parmlen--;
886 		    skip = 1;
887 	       }
888 	       break;
889 	  default:
890 	       if (data[i] == DELNONCH) {
891 		    assert(i + 1 < datalen);
892 		    if ((parmlen -= 2) < 0)
893 			 break;
894 		    *pt++ = DELNONCH;
895 		    *pt++ = data[++i];
896 		    skip = 0;
897 	       }
898 	       else {
899 		    *pt++ = data[i];
900 		    parmlen--;
901 		    skip = 0;
902 	       }
903 	       break;
904 	  }
905      }
906      pcblitt.newstate = skip ? 0 : pcblittda;
907      return parmlen;
908 }
909 
910 
911 /* PARSEMD: Parser for markup declarations.
912             It returns a token each time it is called.
913 
914 */
parsemd(pt,namecase,lpcb,tokenlen)915 int parsemd(pt, namecase, lpcb, tokenlen)
916 UNCH *pt;                     /* Token buffer: >=tokenlen+2. */
917 int namecase;                 /* Case translation: ENTCASE NAMECASE AVALCASE. */
918 struct parse *lpcb;           /* Parse control block for literal parse. */
919 UNS tokenlen;                 /* Max length of expected token: NAMELEN LITLEN */
920 {
921      struct parse *pcb;       /* Current parse control block. */
922 
923      pcb = (lpcb) ? &pcbmd : &pcbmdc;  /* If no literal pcb, dcl is comment. */
924 
925      doparse: while (parse(pcb)==EE_)
926           if (es<mdessv) {synerr(37, pcb); mdessv = es;}
927      if (pcb->action==PIE_) { /* PI entity reference not allowed. */
928           entpisw = 0;        /* Reset PI entity indicator. */
929           synerr(59, pcb);
930           goto doparse;
931      }
932      ++parmno;           /* Increment parameter counter. */
933      switch (pcb->action) {
934      case CDR:           /* COM[1] (MINUS) occurred previously. */
935           REPEATCC;
936           return (int)pcb->action;
937      case LIT:           /* Literal: CDATA with LIT delimiter. */
938           parselit(pt, lpcb, tokenlen, lex.d.lit);
939           return (int)pcb->action;
940      case LITE:          /* Literal: CDATA with LITA delimiter. */
941           parselit(pt, lpcb, tokenlen, lex.d.lita);
942           return((int)(pcb->action = LIT));
943      case RNS:           /* Reserved name started (after RNI). */
944           parsenm(pt, NAMECASE);
945           return (int)pcb->action;
946      case NAS:           /* Name started. */
947           if (namecase!=AVALCASE) {
948                parsenm(pt, namecase);
949                return (int)pcb->action;
950           }
951           /* Treat attribute value as name character string. */
952      case NMT:           /* Name token string. */
953           parsetkn(pt, NMC, (int)tokenlen);  /* Get undelimited value. */
954           return (int)pcb->action;
955      case NUM:           /* Number or number token string. */
956           parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen);
957 	  if (tokenlen > NAMELEN) pcb->newstate = 0;
958           return (int)pcb->action;
959      case PENR:
960 	  REPEATCC;
961 	  return (pcb->action = PEN);
962      case EOD_:
963           exiterr(133, pcb);
964           /* EXIT */
965      default:            /* End of declaration. */
966           return (int)pcb->action; /* EMD GRPS MGRP PEN PGRP */
967      }
968 }
969 /* PARSEMOD: If the declared content was a keyword, the token count is zero
970              and it is only necessary to save the type.  Otherwise,
971              collect the outermost token count and model type bytes for a model.
972              The count includes tokens found in nested groups also.
973              After building the model, parse for its occurrence indicator.
974 */
parsemod(dctype)975 struct thdr *parsemod(dctype)
976 int dctype;                        /* Content type (0=model). */
977 {
978      gbuf[0].ttype = (UNCH)dctype; /* Initialize content flags byte. */
979      if (dctype) {gbuf[0].tu.tnum = 0; return gbuf;} /* Return if not model. */
980 
981      gbuf[0].tu.tnum = 0;          /* Don't count 1st group or model header. */
982      gbuf[1].ttype = 0;            /* Initialize 1st group type ... */
983      gbuf[1].tu.tnum = 0;          /* and count. */
984      grplvl = 1;                   /* Content model is 1st level group. */
985      pcbgrcm.newstate = 0;         /* Go parse the model group. */
986      /* Empty group is trapped during syntax parse; other errors return NULL. */
987      if (!parsegcm(&pcbgrcm, &gbuf[1], &gbuf[0])) return (struct thdr *)0;
988      parse(&pcbgrcs);             /* Get the model suffix, if there is one. */
989      switch(pcbgrcs.action) {
990      case OPT:                     /* OPT occurrence indicator for model. */
991           SET(gbuf[1].ttype, TOPT|TXOPT);
992           break;
993      case REP:                     /* REP occurrence indicator for model. */
994           SET(gbuf[1].ttype, TREP|TXREP);
995           break;
996      case OREP:                    /* OREP occurrence indicator for model. */
997           SET(gbuf[1].ttype, TOREP|TXOREP);
998           break;
999      case EE_:
1000 	  if (es < mdessv) {
1001 	       synerr(37, &pcbmd);
1002 	       mdessv = es;
1003 	  }
1004      default:                      /* RCR_: Repeat char and return. */
1005           break;
1006      }
1007      if (sw.swambig) ambig();	   /* Check content model for ambiguity. */
1008      return gbuf;
1009 }
1010 /* PARSEGCM: Collect token headers (struct thdr) into a group (array).
1011              An etd is defined for each GI (if none exists) and its pointer is
1012              stored in the header.  The function is called recursively.
1013 */
parsegcm(pcb,pgh,gbuf)1014 struct thdr *parsegcm(pcb, pgh, gbuf)
1015 struct parse *pcb;                 /* Current parse control block. */
1016 struct thdr *pgh;                  /* Current group header in group buffer. */
1017 struct thdr *gbuf;                 /* Header for outermost group (model). */
1018 {
1019 #define MCON gbuf->ttype           /* Model type (content attributes). */
1020      struct thdr *pg=pgh;          /* Current group token. */
1021      struct thdr *pgsv=pgh;        /* Saved current token for occ indicator. */
1022      int optcnt = 0;               /* Count of optional tokens in group. */
1023      int essv = es;                /* Entity stack level when grp started. */
1024 
1025     while (gbuf->tu.tnum<=GRPGTCNT && pgh->tu.tnum<=GRPCNT && parse(pcb)!=GRPE)
1026      switch (pcb->action) {
1027 
1028      case NAS_:          /* GI name: get its etd and store it. */
1029           ++gbuf->tu.tnum; ++pgh->tu.tnum;
1030           (pgsv = ++pg)->ttype = TTETD;
1031           pg->tu.thetd = etddef(parsenm(tbuf, NAMECASE));
1032           SET(MCON, MGI);
1033           continue;
1034 
1035      case RNS_:          /* Reserved name started (#PCDATA). */
1036           parsenm(tbuf, NAMECASE);
1037           if (ustrcmp(tbuf+1, key[KPCDATA])) {
1038                mderr(116, ntoa(gbuf->tu.tnum), tbuf+1);
1039                return (struct thdr *)0;
1040           }
1041           /* If #PCDATA is the first non-group token, model is a phrase. */
1042           if (!MCON) SET(MCON, MPHRASE);
1043      case DTAG:          /* Data tag template ignored; treat as #PCDATA. */
1044           if (pcb->action==DTAG) SET(pgh->ttype, TTSEQ); /* DTAG is SEQ grp. */
1045           ++gbuf->tu.tnum; ++pgh->tu.tnum;
1046           (++pg)->ttype = TTCHARS+TOREP;/* #PCDATA is OPT and REP. */
1047           pg->tu.thetd = ETDCDATA;
1048           ++optcnt;                     /* Ct opt tokens to see if grp is opt.*/
1049           SET(MCON, MCHARS);
1050           continue;
1051 
1052      case GRP_:          /* Group started. */
1053           ++gbuf->tu.tnum; ++pgh->tu.tnum;
1054           (pgsv = ++pg)->ttype = 0;     /* Type will be set by connector. */
1055           pg->tu.tnum = 0;              /* Group has number instead of etd. */
1056           if (++grplvl>GRPLVL) {
1057                mderr(115, ntoa(gbuf->tu.tnum), (UNCH *)0);
1058                return (struct thdr *)0;
1059           }
1060           pg = parsegcm(pcb, pg, gbuf);
1061           if (!pg) return (struct thdr *)0;
1062           if (GET(pgsv->ttype, TOPT)) ++optcnt;  /* Indicate nested opt grp. */
1063           --grplvl;
1064           continue;
1065 
1066      case OREP:          /* OREP occurrence indicator for current token.*/
1067           SET(pgsv->ttype, TREP|TXREP);
1068                          /* Now treat like OPT. */
1069      case OPT:           /* OPT occurrence indicator for current token. */
1070           SET(pgsv->ttype, TXOPT);
1071           if (GET(pgsv->ttype, TOPT)) continue;  /* Exit if nested opt grp. */
1072           SET(pgsv->ttype, TOPT);
1073           ++optcnt;      /* Count opt tokens to see if grp is optional. */
1074           continue;
1075      case REP:           /* REP occurrence indicator for current token. */
1076           SET(pgsv->ttype, TREP|TXREP);
1077           continue;
1078 
1079      case OR:            /* OR connector found. */
1080           if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTOR);
1081           else if (GET(pgh->ttype, TTAND)!=TTOR)
1082                mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1083           continue;
1084      case AND:           /* AND connector found. */
1085           if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTAND);
1086           else if (GET(pgh->ttype, TTAND)!=TTAND)
1087                mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1088           continue;
1089      case SEQ:           /* SEQ connector found. */
1090           if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTSEQ);
1091           else if (GET(pgh->ttype, TTAND)!=TTSEQ)
1092                mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1093           continue;
1094 
1095      case EE_:           /* Entity ended (correctly or incorrectly). */
1096           if (es<essv) {synerr(37, pcb); essv = es;}
1097           continue;
1098 
1099      case PIE_:          /* PI entity reference (not permitted). */
1100           entpisw = 0;   /* Reset PI entity indicator. */
1101           synerr(59, pcb);
1102           continue;
1103 
1104      default:            /* Syntax errors return in disgrace. */
1105           synerr(37, pcb);
1106           return (struct thdr *)0;
1107      }
1108      if (pgh->tu.tnum>GRPCNT) {
1109           mderr(113, ntoa(gbuf->tu.tnum), (UNCH *)0);
1110           return (struct thdr *)0;
1111      }
1112      if (gbuf->tu.tnum>GRPGTCNT) {
1113           mderr(114, ntoa(gbuf->tu.tnum), (UNCH *)0);
1114           return (struct thdr *)0;
1115      }
1116      if (pgh->tu.tnum==1) SET(pgh->ttype, TTSEQ); /* Unit grp is SEQ. */
1117      /* An optional token in an OR group makes the group optional. */
1118      if (GET(pgh->ttype, TTMASK)==TTOR && optcnt) SET(pgh->ttype, TOPT);
1119      /* If all tokens in any group are optional, so is the group. */
1120      if (pgh->tu.tnum<=optcnt) SET(pgh->ttype, TOPT);
1121 
1122      if (es!=essv) synerr(37, pcb);
1123      return pg;                             /* Return pointer to GRPS token. */
1124 }
1125 /* PARSENM: Parser for SGML names, which can be translated with LEXTRAN.
1126             The input is read from the entity stack.  CC is 1st char of name.
1127             Returns a pointer to the parsed name.
1128 */
parsenm(tbuf,nc)1129 UNCH *parsenm(tbuf, nc)
1130 UNCH *tbuf;                   /* Buffer for name: >=NAMELEN+2. */
1131 int nc;                       /* Namecase translation: 1=yes; 0=no. */
1132 {
1133      UNCH   len;              /* Length of name (incl EOS & length byte). */
1134 
1135      *(tbuf + (len = 1) ) = nc ? lextran[*FPOS] : *FPOS;
1136      while ((NEWCC, (int)lextoke[*FPOS]>=NMC) && (len<NAMELEN)) {
1137           TRACETKN(NMC, lextoke);
1138           if (lextoke[*(tbuf + ++len) = (nc ? lextran[*FPOS] : *FPOS)]==EOB) {
1139                --len;
1140                entget();
1141           }
1142      }
1143      REPEATCC;                       /* Put back the non-token character. */
1144      *(tbuf + ++len) = EOS;          /* Terminate name with standard EOS. */
1145      *tbuf = ++len;                  /* Store length ahead of name. */
1146      return tbuf;
1147 }
1148 /* PARSETKN: Parser for start-tag attribute value tokens.
1149              First character of token is already in *FPOS.
1150              Returns a pointer to the parsed token.
1151 	     Parsed token has EOS but no length byte.
1152 */
1153 #ifdef USE_PROTOTYPES
parsetkn(UNCH * tbuf,UNCH scope,int maxlen)1154 UNCH *parsetkn(UNCH *tbuf, UNCH scope, int maxlen)
1155 #else
1156 UNCH *parsetkn(tbuf, scope, maxlen)
1157 UNCH *tbuf;		      /* Buffer for token: >=maxlen+1. */
1158 UNCH scope;		      /* Minimum lexical class allowed. */
1159 int maxlen;		      /* Maximum length of a token. */
1160 #endif
1161 {
1162      int i = 1;
1163      tbuf[0] = *FPOS;
1164      while (i < maxlen) {
1165 	  NEWCC;
1166 	  if (lextoke[*FPOS] < scope) {
1167 	       REPEATCC;
1168 	       break;
1169 	  }
1170           TRACETKN(scope, lextoke);
1171 	  if (*FPOS == EOBCHAR)
1172 	       entget();
1173 	  else
1174 	       tbuf[i++] = *FPOS;
1175      }
1176      tbuf[i] = EOS;
1177      return tbuf;
1178 }
1179 /* PARSESEQ: Parser for blank sequences (i.e., space and TAB characters ).
1180              First character of sequence is already in *FPOS.
1181 */
parseseq(tbuf,maxlen)1182 VOID parseseq(tbuf, maxlen)
1183 UNCH *tbuf;		      /* Buffer for storing found sequence. */
1184 int maxlen;		      /* Maximum length of a blank sequence. */
1185 {
1186      tbuf[0] = *FPOS;
1187      datalen = 1;
1188      for (;;) {
1189 	  NEWCC;
1190 	  if (*FPOS == EOBCHAR) {
1191 	       entget();
1192 	       continue;
1193 	  }
1194 	  if ((lextoke[*FPOS] != SEP && *FPOS != SPCCHAR)
1195 	      || datalen >= maxlen)
1196 	       break;
1197 	  tbuf[datalen++] = *FPOS;
1198 	  TRACETKN(SEP, lextoke);
1199      }
1200 }
1201 /* S2VALNM: Parser for attribute values that are tokenized like names.
1202             The input is read from a string (hence S ("string") 2 ("to") VALNM).
1203             It stops at the first bad character.
1204             Returns a pointer to the created name.
1205 */
1206 #ifdef USE_PROTOTYPES
s2valnm(UNCH * nm,UNCH * s,UNCH scope,int translate)1207 UNCH *s2valnm(UNCH *nm, UNCH *s, UNCH scope, int translate)
1208 #else
1209 UNCH *s2valnm(nm, s, scope, translate)
1210 UNCH *nm;                     /* Name to be created. */
1211 UNCH *s;                      /* Source string to be parsed as name. */
1212 UNCH scope;                   /* Minimum lexical class allowed. */
1213 int translate;                /* Namecase translation: 1=yes; 0=no. */
1214 #endif
1215 {
1216      UNCH len = 0;            /* Length of name (incl EOS and length). */
1217 
1218      for (; (int)lextoke[*s] >= scope && len < NAMELEN; s++)
1219 	  nm[++len] = translate ? lextran[*s] : *s;
1220      nm[++len] = EOS;         /* Terminate name with standard EOS. */
1221      *nm = ++len;             /* Store length ahead of name. */
1222      return nm;
1223 }
1224 /* PARSEVAL: Parser for attribute values.
1225              The input is read from a string and tokenized in a buffer.
1226              The input is terminated by EOS.
1227              Each token is preceded by its actual length; there is no EOS.
1228              If an error occurs while parsing, or
1229              if a token doesn't conform, set the token count to 0 to show that
1230              value was not tokenized and return the error code.
1231              After successful parse, return buffer length and 0 error code.
1232              The number of tokens found is set in external variable tokencnt.
1233 */
parseval(s,atype,tbuf)1234 int parseval(s, atype, tbuf)
1235 UNCH *s;                      /* Source string to be parsed as token list. */
1236 UNS atype;                    /* Type of token list expected. */
1237 UNCH *tbuf;                   /* Work area for tokenization. */
1238 {
1239      int t;
1240      UNCH *pt = tbuf;
1241 
1242      pcbval.newstate = 0; tokencnt = 0;
1243      while (1) {
1244           for (;;) {
1245                pcbval.input = lextoke[*s];
1246                pcbval.state = pcbval.newstate;
1247                pcbval.newstate = (*(pcbval.ptab + pcbval.state)) [pcbval.input];
1248                pcbval.action = (*(pcbval.ptab + pcbval.state+1)) [pcbval.input];
1249                TRACEVAL(&pcbval, atype, s, tokencnt);
1250 	       if (pcbval.action != NOPA)
1251 		    break;
1252 	       s++;
1253           }
1254 
1255 
1256           switch (pcbval.action) {
1257           case INVA:          /* Invalid character; terminate parse. */
1258                if (*s == '\0') goto alldone;  /* Normal termination. */
1259                tokencnt = 0;  /* Value was not tokenized. */
1260                return(14);
1261           case LENA:          /* Length limit of token exceeded; end parse. */
1262                tokencnt = 0;  /* Value was not tokenized. */
1263                return(15);
1264           default:            /* Token begun: NUMA, NASA, or NMTA. */
1265                break;
1266           }
1267 
1268           ++tokencnt;         /* One token per iteration. */
1269           switch (atype) {
1270           case AENTITY:
1271                if (tokencnt>1) {tokencnt = 0; return(16);}
1272           case AENTITYS:
1273                if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
1274                s2valnm(pt, s, NMC, ENTCASE);
1275                break;
1276 
1277           case AID:
1278           case AIDREF:
1279           case ANAME:
1280           case ANOTEGRP:
1281                if (tokencnt>1) {tokencnt = 0; return(16);}
1282           case AIDREFS:
1283           case ANAMES:
1284                if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
1285                s2valnm(pt, s, NMC, NAMECASE);
1286                break;
1287 
1288           case ANMTGRP:
1289           case ANMTOKE:
1290                if (tokencnt>1) {tokencnt = 0; return(16);}
1291           case ANMTOKES:
1292                /* No test needed because NMTA, NUMA and NASA are all valid. */
1293                s2valnm(pt, s, NMC, NAMECASE);
1294                break;
1295 
1296           case ANUMBER:
1297                if (tokencnt>1) {tokencnt = 0; return(16);}
1298           case ANUMBERS:
1299                if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
1300                s2valnm(pt, s, NU, NAMECASE);
1301 	       t = lextoke[s[*pt - 2]];
1302 	       if (t == NMS || t == NMC) {tokencnt = 0; return(17);}
1303                break;
1304 
1305           case ANUTOKE:
1306                if (tokencnt>1) {tokencnt = 0; return(16);}
1307           case ANUTOKES:
1308                if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
1309                s2valnm(pt, s, NMC, NAMECASE);
1310                break;
1311           }
1312 	  *pt -= 2;
1313 	  s += *pt;
1314 	  pt += *pt + 1;
1315      }
1316  alldone:
1317      *pt++ = EOS;
1318      if (*tbuf == '\0')
1319 	  return 25;
1320      if (atype < ATKNLIST)
1321 	  *tbuf += 2;	      /* include length and EOS */
1322      return 0;
1323 }
1324 /*
1325 Local Variables:
1326 c-indent-level: 5
1327 c-continued-statement-offset: 5
1328 c-brace-offset: -5
1329 c-argdecl-indent: 0
1330 c-label-offset: -5
1331 comment-column: 30
1332 End:
1333 */
1334