1 #include "sgmlincl.h"         /* #INCLUDE statements for SGML parser. */
2 #include "context.h"
3 
4 #define GI (tags[ts].tetd->etdgi+1)              /* GI of current element. */
5 #define NEWGI (newetd->etdgi+1)                  /* GI of new tag. */
6 #define STATUS (*statuspt)    /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
7 #define PEX (-1)              /* GI is a plus exception and not a minus. */
8 
9 #define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h))
10 #define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \
11 		      |= (1L<<((n-1)&(LONGBITS-1))))
12 #define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1))))
13 
14 #define HITOFF(h, n) (!(HITON(h, n)))
15 
16 #define TOKENHIT HITON(H,T)
17 
18 static
copypos(to,from)19 VOID copypos(to, from)
20 struct mpos *to, *from;
21 {
22      int i;
23      for (i = 0; i <= (int)from[0].t; i++) {
24 	  to[i].g = from[i].g;
25 	  to[i].t = from[i].t;
26 	  memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long));
27      }
28 }
29 
30 /* CONTEXT: Determine whether a GI is valid in the present structural context.
31             Returns RCHIT if valid, RCEND if element has ended, RCREQ if a
32             different element is required, and RCMISS if it is totally invalid.
33             On entry, pos points to the model token to be tested against the GI.
34             TO DO: Save allowed GIs for an error message on an RCMISS.
35                    Support a "query" mode (what is allowed now?) by working
36                    with a copy of pos.
37 */
context(gi,mod,pos,statuspt,mexts)38 int context(gi, mod, pos, statuspt, mexts)
39 struct etd *gi;               /* ETD of new GI. */
40 struct thdr mod[];            /* Model of current open element. */
41 struct mpos pos[];            /* Position in open element's model. */
42 UNCH *statuspt;               /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
43 int mexts;                    /* >0=stack level of minus grp; -1=plus; 0=none.*/
44 {
45      UNCH toccsv, gtypesv;    /* Save token's TOCC and GTYPE in case grp ends.*/
46 
47      if (mexts == -1) {
48 	  if (STATUS == RCEND)
49 	       return RCPEX;
50 	  copypos(savedpos, pos);
51      }
52      Tstart = T;              /* Save starting token for AND group testing. */
53      while (STATUS!=RCMISS && STATUS!=RCEND) {
54           TRACEGI("CONTEXT", gi, mod, pos, Tstart);
55           while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
56                pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);
57                Tstart = T;    /* Save starting token for AND group testing. */
58                TRACEGI("OPENGRP", gi, mod, pos, Tstart);
59           }
60           STATUS = (UNCH)tokenreq(gi, mod, pos);
61           TRACEGI("STATUS", gi, mod, pos, Tstart);
62           if (gi==TOKEN.tu.thetd) {     /* Hit in model. */
63                STATUS = (UNCH)RCHIT;
64                gtypesv = GTYPE; toccsv = TOCC;
65                newtoken(mod, pos, statuspt);
66                return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT))
67                                        ?  RCMEX : RCHITMEX);
68           }
69           if (STATUS==RCREQ) {
70 	       if (mexts == -1)
71 		    break;
72                STATUS = RCHIT;
73                nextetd = TOKEN.tu.thetd;
74                newtoken(mod, pos, statuspt);
75                return(RCREQ);
76           }
77           /* else if (STATUS==RCNREQ) */
78                if (mexts>0) return(RCMEX);
79                newtoken(mod, pos, statuspt);
80      }
81      if (mexts == -1) {
82 	  copypos(pos, savedpos);
83 	  return STATUS = RCPEX;
84      }
85      return((int)STATUS);
86 }
87 /* ECONTEXT: Determine whether the current element can be ended, or whether
88              non-optional tokens remain at the current level or higher.
89              Returns 1 if element can be ended, or 0 if tokens remain.
90              On entry, STATUS==RCEND if there are no tokens left; if not,
91              pos points to the next model token to be tested.
92              TO DO: Support a "query" mode (what is required now?) by working
93                     with a copy of pos.
94 */
econtext(mod,pos,statuspt)95 int econtext(mod, pos, statuspt)
96 struct thdr mod[];            /* Model of current open element. */
97 struct mpos pos[];            /* Position in open element's model. */
98 UNCH *statuspt;               /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
99 {
100      unsigned next;           /* Position in AND group of next testable token.*/
101 
102      Tstart = T;
103      TRACEEND("ECONT", mod, pos, 0, 0, Tstart);
104      if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
105      nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;
106      while (STATUS!=RCMISS && STATUS!=RCEND) {
107           STATUS = (UNCH)testend(mod, pos, 0, 0);
108           TRACEEND("ECONTEND", mod, pos, 0, 0, Tstart);
109           nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;
110           if (STATUS==RCEND)       return(1);
111           if (P<=1)                return(TOKENHIT || BITON(TOCC, TOPT));
112           if (STATUS==RCMISS) {
113                if (BITON(TOCC, TOPT)) nextetd = 0;
114                return(0);
115           }
116           if (!tokenopt(mod, pos)) return(0);
117 
118           STATUS = RCNREQ;
119           if (GTYPE!=TTAND) ++T;   /* T!=GNUM or group would have ended. */
120           else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?
121                next : offbit(H, 0, GNUM));
122 
123           M = G + grpsz(&GHDR, (int)T-1) + 1;
124           TRACEEND("ECONTNEW", mod, pos, 0, 0, Tstart);
125      }
126      if (STATUS==RCMISS) {
127           if (BITON(TOCC, TOPT)) nextetd = 0;
128           return(0);
129      }
130      return(1);               /* STATUS==RCEND */
131 }
132 /* NEWTOKEN: Find the next token to test.  Set STATUS to indicate results:
133                   RCEND  if element has ended (no more tokens to test);
134                   RCREQ  if required new token was found;
135                   RCNREQ if non-required new token was found;
136                   RCHIT  if a hit token was repeated (now non-required);
137               and RCMISS if a new token can't be found because current token
138               (which was not hit) was neither unconditionally required nor
139               optional.
140 */
newtoken(mod,pos,statuspt)141 VOID newtoken(mod, pos, statuspt)
142 struct thdr mod[];            /* Model of current open element. */
143 struct mpos pos[];            /* Position in open element's model. */
144 UNCH *statuspt;               /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
145 {
146      unsigned nextand = 0;    /* Position in AND group of next testable token.*/
147      int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */
148 
149      /* If the GI was a hit, turn on the hit bit and set the status to
150         assume that the token to be tested against the next GI will
151         be non-required.  If the current token is repeatable, exit so
152         it will stand as the next token to test.
153      */
154      if (STATUS==RCHIT) {
155           HITSET(H, T);
156 	  STATUS = RCNREQ;
157           if (BITON(TOCC, TREP)) return;
158      }
159      /* At this point, we must determine the next token to test:
160         either against the next GI, if this one was a hit, or
161         against the same GI if conditions permit a retry.
162         To find the next token, we must first end the current group,
163         if possible, and any we can that contain it.
164         If the outermost group was a hit and is repeatable, or
165         if the element has ended, we exit now.
166         If it hasn't ended, or was optional and ended with a miss,
167         we can retry the GI against the next token.
168      */
169      if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return;
170 
171      /* At this point, the "current token" is either the original one,
172         or the token for the highest level unhit group that it ended.
173         We will retry a missed GI, by testing it against the next
174         token, if the current token:
175         1. Is optional;
176         2. Was hit (i.e., because it is repeatable and was hit by a
177            previous GI or because it is a hit group that just ended);
178         3. Is in an AND or OR group and is not the last testable token.
179 
180         It will be the next sequential one (unhit one, in an AND group);
181         if there are none left, use the first unhit token in the group.
182         In either case, set M to correspond to the new T.
183      */
184      retest:
185      TRACEEND("RETEST", mod, pos, (int)nextand, 1, Tstart);
186      if (GTYPE==TTAND) {
187           nextand = offbit(H, (int)T, GNUM);
188 	  if (!nextand)
189 	       nextand = offbit(H, 0, GNUM);
190      }
191      if ( BITON(TOCC, TOPT)
192        || TOKENHIT
193        || GTYPE==TTOR              /* T!=GNUM or group would have ended. */
194        || nextand ) {
195           if (GTYPE!=TTAND) ++T;   /* T!=GNUM or group would have ended. */
196           else T = nextand;
197           M = G + grpsz(&GHDR, (int)T-1) + 1;
198           if (GTYPE==TTAND) {
199 	       /* If AND group wrapped, it can end if all non-optionals were
200 		  hit. */
201 	       if (T==Tstart && !currhit) {
202                     UNCH Psave = P;
203                     int rc = testend(mod, pos, 0, 1);
204                     if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;}
205                     else STATUS = RCMISS;
206                }
207 
208 	       /* We only test unhit tokens, so we must use an unhit token
209 		  as Tstart (which is used to detect when the AND group has
210 		  wrapped). */
211 	       else if (HITON(H,Tstart)) Tstart = T;
212 	  }
213      }
214      else STATUS = RCMISS;
215      TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1, Tstart);
216 }
217 /* TESTEND: End the current group, if possible, and any that it is nested in.
218             The current token will either be a group header, or some token
219             that could not end its group.  Return 1 if the (possibly new)
220             current token is repeatable; 0 if it is not.
221 */
testend(mod,pos,andoptsw,newtknsw)222 int testend(mod, pos, andoptsw, newtknsw)
223 struct thdr mod[];            /* Model of current open element. */
224 struct mpos pos[];            /* Position in open element's model. */
225 int andoptsw;                 /* 1=test optional AND members; 0=ignore. */
226 int newtknsw;                 /* 1=new token test; 0=end element test. */
227 {
228      int rc = 0;              /* Return code: RCNREQ RCHIT RCMISS RCEND */
229 
230      while (!rc) {
231           TRACEEND("TRACEEND", mod, pos, rc, andoptsw, Tstart);
232           /* TESTMISS:
233              If we've hit no tokens yet in the current group, and
234              the current token is the last unhit one in the group we can test,
235              we will end the group (it may never really have started!)
236              because we might be able to try the token that follows it.
237              In any group, a token is the last testable unhit token if it
238              is the last sequential one, as the GI was already tested against
239              the preceding unhit tokens.  In addition,
240              in a SEQ group, it is the last testable unhit token if it isn't
241              optional, because we can't skip past it to the following ones.
242              If we end the group, before popping the level, set M to G, as this
243              level`s group header will be the next level's current token.
244           */
245           if (!ANYHIT(H) && (T==GNUM
246 			     || (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) {
247                M = G; --P; Tstart = T;
248                if (P<=1) {
249                     if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
250                     else                               rc = RCMISS;
251                }
252                continue;
253           }
254           /* TESTHIT:
255              See if we've hit all the non-optional tokens in the group.
256              If so, pop to the previous level and set the group's hit bit.
257              If we were called from NEWTOKEN we are trying to find the token
258              to test against the next start-tag, so if the group is repeatable,
259              process it again.  (If not, we were called from ECONTEXT and
260              are testing whether the element can be ended.)
261              Otherwise, if we are at the first level, the element is over.
262           */
263           if ((GTYPE==TTOR  && TOKENHIT)
264 	      || (GTYPE==TTSEQ && T==(UNCH)GNUM
265 		  && (TOKENHIT || BITON(TOCC, TOPT)))
266 	      || (GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw))) {
267                M = G;
268 	       --P;
269 	       HITSET(H, T);
270 	       Tstart = T;
271                if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT;
272                else if (P<=1)                     rc = RCEND;
273 	       /* If we are looking for a new token to test against the next
274 		  start-tag, then we need to consider optional and members
275 		  in this group, even if we didn't need to consider them
276 		  in the group that we just ended because that group had
277 		  wrapped. */
278 	       else if (newtknsw) andoptsw = 1;
279                /* Else loop to test new outer group. */
280           }
281           else rc = RCNREQ;   /* No group ended this time, so return. */
282      }
283      TRACEEND("ENDFOUND", mod, pos, rc, andoptsw, Tstart);
284      return(rc);
285 }
286 /* TOKENOPT: Return 1 if current token is contextually optional;
287              otherwise, return 0.
288 */
tokenopt(mod,pos)289 int tokenopt(mod, pos)
290 struct thdr mod[];            /* Model of current open element. */
291 struct mpos pos[];            /* Position in open element's model. */
292 {
293      TRACEEND("TOKENOPT", mod, pos, 0, 0, Tstart);
294      return (BITON(TOCC, TOPT) /* Inherently optional. */
295 	     || TOKENHIT      /* Was hit (handles "plus" suffix case). */
296 	     || (!ANYHIT(H) && groupopt(mod, pos)));
297 			      /* In optional group with no hits. */
298 }
299 /* GROUPOPT: Temporarily makes the current group be the current token so that
300              TOKENOPT() can be applied to it.  Returns the value returned
301              by TOKENOPT.
302 */
groupopt(mod,pos)303 int groupopt(mod, pos)
304 struct thdr mod[];            /* Model of current open element. */
305 struct mpos pos[];            /* Position in open element's model. */
306 {
307      UNCH saveM;              /* Save M when testing if group is not required.*/
308      int rc;                  /* 1=contextually optional; 0=not. */
309 
310      if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT);
311      saveM = M; M = G; --P;
312      rc = tokenopt(mod, pos);
313      ++P; G = M; M = saveM;
314      return(rc);
315 }
316 /* TOKENREQ: Returns RCREQ if the current token is "contextually required".
317              That is, it is not contextually optional and
318                  1) it is a member of a "seq" group that is either required
319                     or has at least 1 hit token.
320                  2) it is a member of an "and" group in which all other
321                     tokens were hit.
322                           Optional tokens are not counted
323                           if GI is ETDCDATA, as we are looking for an
324                           omitted start-tag.  Otherwise, they are counted,
325                           as the GI might match one of them.
326              Returns RCNREQ if the current token is "not required".
327 */
tokenreq(gi,mod,pos)328 int tokenreq(gi, mod, pos)
329 struct etd *gi;               /* ETD of new GI. */
330 struct thdr mod[];            /* Model of current open element. */
331 struct mpos pos[];            /* Position in open element's model. */
332 {
333      TRACEGI("TOKENREQ", gi, mod, pos, Tstart);
334      return( tokenopt(mod, pos) ? RCNREQ
335             : ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ)
336 #if 0
337 	       || (GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1))
338 #endif
339 	       )
340                 ? RCREQ : RCNREQ );
341 }
342 /* GROUPREQ: Temporarily makes the current group be the current token so that
343              TOKENREQ() can be applied to it.  Returns the value returned
344              by TOKENREQ.
345 */
groupreq(gi,mod,pos)346 int groupreq(gi, mod, pos)
347 struct etd *gi;               /* ETD of new GI. */
348 struct thdr mod[];            /* Model of current open element. */
349 struct mpos pos[];            /* Position in open element's model. */
350 {
351      UNCH saveM;              /* Save M when testing if group is not required.*/
352      int rc;                  /* Return code: RCREQ RCNREQ */
353 
354      if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ);
355      saveM = M; M = G; --P;
356      rc = tokenreq(gi, mod, pos);
357      ++P; G = M; M = saveM;
358      return(rc);
359 }
360 /* GRPSZ: Returns the number of tokens spanned by a group in the model (M),
361           from the group's start (G) to a specified index within the group (T).
362           M = 0, plus 1 for each token in the group, plus the size of
363           any subgroups (gotten by calling GRPSZ recursively).  On entry,
364           M must be equal to G at the current level.
365 */
grpsz(g,t)366 int grpsz(g, t)
367 struct thdr *g;               /* mod[G]: Ptr to group in the model. */
368 int t;                        /* T: Index of last token in the group. */
369 {
370      struct thdr *p = g;      /* Ptr to current token in the model. */
371      int m = 0;               /* Size of group (including nested groups). */
372      int i = 0;               /* Number of group members (loop counter). */
373      UNS type;                /* Token type (without TOREP bits). */
374 
375      while (++i<=t) {
376           ++p; ++m;
377           type = GET(p->ttype, TTMASK);
378           if (type==TTOR || type==TTSEQ || type==TTAND) {
379                m += grpsz(p, p->tu.tnum);
380                p = g+m;
381           }
382      }
383      return(m);
384 }
385 /* ALLHIT: Returns 1 if all hit bits for the specified group are turned on,
386            (other than those that correspond to optional tokens if "opt" is
387            0) and the "but" bit (all bits if "but" bit is zero).  Otherwise,
388            returns 0.  GRPSZ is used to skip past subgroup tokens.
389 */
allhit(p,hits,but,opt)390 int allhit(p, hits, but, opt)
391 struct thdr *p;               /* mod[G]: Ptr to group in the model. */
392 unsigned long *hits;	      /* H: Hit bits to be tested. */
393 int but;                      /* Index of bit to ignore; 0=test all. */
394 int opt;                      /* 1=optional tokens must be hit; 0=ignore. */
395 {
396      int b = 0;               /* Index of bit being tested in hits. */
397      int e = p->tu.tnum;      /* Ending index (number of bits to test). */
398      unsigned type;           /* Token type (without TOREP bits). */
399 
400      while (++p, ++b<=e) {
401           if (HITOFF(hits,b) && (opt || BITOFF(p->ttype,TOPT)) && b!=but)
402                return 0;
403           if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND)
404                p += grpsz(p, p->tu.tnum);
405      }
406      return 1;
407 }
408 /* OFFBIT: Returns the index of the first unset bit after (i.e., not including)
409            the caller's "first" bit. If all bits through the
410            specified last bit are on, it returns 0.
411 */
offbit(bits,first,last)412 int offbit(bits, first, last)
413 unsigned long *bits;	      /* Bits to be tested. */
414 int first;                    /* Index of first bit to be tested in bits. */
415 int last;                     /* Index of last bit to be tested in bits. */
416 {
417      while (++first <= last)
418           if (HITOFF(bits, first))
419 	       return first;
420      return 0;
421 }
422 
423 /* ANYHIT: Return 1 if any bit is set. */
424 
anyhit(bits)425 int anyhit(bits)
426 unsigned long *bits;
427 {
428      int i;
429      for (i = 0; i < grplongs; i++)
430 	  if (bits[i] != 0)
431 	       return 1;
432      return 0;
433 }
434 
435 /*
436 Local Variables:
437 c-indent-level: 5
438 c-continued-statement-offset: 5
439 c-brace-offset: -5
440 c-argdecl-indent: 0
441 c-label-offset: -5
442 comment-column: 30
443 End:
444 */
445