1 #include "sgmlincl.h" /* #INCLUDE statements for SGML parser. */
2 #include "context.h"
3
4 #define GI (tags[ts].tetd->etdgi+1) /* GI of current element. */
5 #define NEWGI (newetd->etdgi+1) /* GI of new tag. */
6 #define STATUS (*statuspt) /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
7 #define PEX (-1) /* GI is a plus exception and not a minus. */
8
9 #define ANYHIT(h) (grplongs == 1 ? ((h)[0] != 0) : anyhit(h))
10 #define HITSET(h, n) (h[(unsigned)(n-1)>>LONGPOW] \
11 |= (1L<<((n-1)&(LONGBITS-1))))
12 #define HITON(h, n) (h[(unsigned)(n-1)>>LONGPOW] & (1L<<((n-1)&(LONGBITS-1))))
13
14 #define HITOFF(h, n) (!(HITON(h, n)))
15
16 #define TOKENHIT HITON(H,T)
17
18 static
copypos(to,from)19 VOID copypos(to, from)
20 struct mpos *to, *from;
21 {
22 int i;
23 for (i = 0; i <= (int)from[0].t; i++) {
24 to[i].g = from[i].g;
25 to[i].t = from[i].t;
26 memcpy(to[i].h, from[i].h, grplongs*sizeof(unsigned long));
27 }
28 }
29
30 /* CONTEXT: Determine whether a GI is valid in the present structural context.
31 Returns RCHIT if valid, RCEND if element has ended, RCREQ if a
32 different element is required, and RCMISS if it is totally invalid.
33 On entry, pos points to the model token to be tested against the GI.
34 TO DO: Save allowed GIs for an error message on an RCMISS.
35 Support a "query" mode (what is allowed now?) by working
36 with a copy of pos.
37 */
context(gi,mod,pos,statuspt,mexts)38 int context(gi, mod, pos, statuspt, mexts)
39 struct etd *gi; /* ETD of new GI. */
40 struct thdr mod[]; /* Model of current open element. */
41 struct mpos pos[]; /* Position in open element's model. */
42 UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
43 int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/
44 {
45 UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/
46
47 if (mexts == -1) {
48 if (STATUS == RCEND)
49 return RCPEX;
50 copypos(savedpos, pos);
51 }
52 Tstart = T; /* Save starting token for AND group testing. */
53 while (STATUS!=RCMISS && STATUS!=RCEND) {
54 TRACEGI("CONTEXT", gi, mod, pos, Tstart);
55 while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) {
56 pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H);
57 Tstart = T; /* Save starting token for AND group testing. */
58 TRACEGI("OPENGRP", gi, mod, pos, Tstart);
59 }
60 STATUS = (UNCH)tokenreq(gi, mod, pos);
61 TRACEGI("STATUS", gi, mod, pos, Tstart);
62 if (gi==TOKEN.tu.thetd) { /* Hit in model. */
63 STATUS = (UNCH)RCHIT;
64 gtypesv = GTYPE; toccsv = TOCC;
65 newtoken(mod, pos, statuspt);
66 return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT))
67 ? RCMEX : RCHITMEX);
68 }
69 if (STATUS==RCREQ) {
70 if (mexts == -1)
71 break;
72 STATUS = RCHIT;
73 nextetd = TOKEN.tu.thetd;
74 newtoken(mod, pos, statuspt);
75 return(RCREQ);
76 }
77 /* else if (STATUS==RCNREQ) */
78 if (mexts>0) return(RCMEX);
79 newtoken(mod, pos, statuspt);
80 }
81 if (mexts == -1) {
82 copypos(pos, savedpos);
83 return STATUS = RCPEX;
84 }
85 return((int)STATUS);
86 }
87 /* ECONTEXT: Determine whether the current element can be ended, or whether
88 non-optional tokens remain at the current level or higher.
89 Returns 1 if element can be ended, or 0 if tokens remain.
90 On entry, STATUS==RCEND if there are no tokens left; if not,
91 pos points to the next model token to be tested.
92 TO DO: Support a "query" mode (what is required now?) by working
93 with a copy of pos.
94 */
econtext(mod,pos,statuspt)95 int econtext(mod, pos, statuspt)
96 struct thdr mod[]; /* Model of current open element. */
97 struct mpos pos[]; /* Position in open element's model. */
98 UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
99 {
100 unsigned next; /* Position in AND group of next testable token.*/
101
102 Tstart = T;
103 TRACEEND("ECONT", mod, pos, 0, 0, Tstart);
104 if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));}
105 nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0;
106 while (STATUS!=RCMISS && STATUS!=RCEND) {
107 STATUS = (UNCH)testend(mod, pos, 0, 0);
108 TRACEEND("ECONTEND", mod, pos, 0, 0, Tstart);
109 nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd;
110 if (STATUS==RCEND) return(1);
111 if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT));
112 if (STATUS==RCMISS) {
113 if (BITON(TOCC, TOPT)) nextetd = 0;
114 return(0);
115 }
116 if (!tokenopt(mod, pos)) return(0);
117
118 STATUS = RCNREQ;
119 if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
120 else T = (UNCH)(((next = (UNS)offbit(H, (int)T, GNUM))!=0) ?
121 next : offbit(H, 0, GNUM));
122
123 M = G + grpsz(&GHDR, (int)T-1) + 1;
124 TRACEEND("ECONTNEW", mod, pos, 0, 0, Tstart);
125 }
126 if (STATUS==RCMISS) {
127 if (BITON(TOCC, TOPT)) nextetd = 0;
128 return(0);
129 }
130 return(1); /* STATUS==RCEND */
131 }
132 /* NEWTOKEN: Find the next token to test. Set STATUS to indicate results:
133 RCEND if element has ended (no more tokens to test);
134 RCREQ if required new token was found;
135 RCNREQ if non-required new token was found;
136 RCHIT if a hit token was repeated (now non-required);
137 and RCMISS if a new token can't be found because current token
138 (which was not hit) was neither unconditionally required nor
139 optional.
140 */
newtoken(mod,pos,statuspt)141 VOID newtoken(mod, pos, statuspt)
142 struct thdr mod[]; /* Model of current open element. */
143 struct mpos pos[]; /* Position in open element's model. */
144 UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/
145 {
146 unsigned nextand = 0; /* Position in AND group of next testable token.*/
147 int currhit = (STATUS==RCHIT); /* 1=current GI hit; 0=not. */
148
149 /* If the GI was a hit, turn on the hit bit and set the status to
150 assume that the token to be tested against the next GI will
151 be non-required. If the current token is repeatable, exit so
152 it will stand as the next token to test.
153 */
154 if (STATUS==RCHIT) {
155 HITSET(H, T);
156 STATUS = RCNREQ;
157 if (BITON(TOCC, TREP)) return;
158 }
159 /* At this point, we must determine the next token to test:
160 either against the next GI, if this one was a hit, or
161 against the same GI if conditions permit a retry.
162 To find the next token, we must first end the current group,
163 if possible, and any we can that contain it.
164 If the outermost group was a hit and is repeatable, or
165 if the element has ended, we exit now.
166 If it hasn't ended, or was optional and ended with a miss,
167 we can retry the GI against the next token.
168 */
169 if ((STATUS = (UNCH)testend(mod, pos, 1, 1))!=RCNREQ) return;
170
171 /* At this point, the "current token" is either the original one,
172 or the token for the highest level unhit group that it ended.
173 We will retry a missed GI, by testing it against the next
174 token, if the current token:
175 1. Is optional;
176 2. Was hit (i.e., because it is repeatable and was hit by a
177 previous GI or because it is a hit group that just ended);
178 3. Is in an AND or OR group and is not the last testable token.
179
180 It will be the next sequential one (unhit one, in an AND group);
181 if there are none left, use the first unhit token in the group.
182 In either case, set M to correspond to the new T.
183 */
184 retest:
185 TRACEEND("RETEST", mod, pos, (int)nextand, 1, Tstart);
186 if (GTYPE==TTAND) {
187 nextand = offbit(H, (int)T, GNUM);
188 if (!nextand)
189 nextand = offbit(H, 0, GNUM);
190 }
191 if ( BITON(TOCC, TOPT)
192 || TOKENHIT
193 || GTYPE==TTOR /* T!=GNUM or group would have ended. */
194 || nextand ) {
195 if (GTYPE!=TTAND) ++T; /* T!=GNUM or group would have ended. */
196 else T = nextand;
197 M = G + grpsz(&GHDR, (int)T-1) + 1;
198 if (GTYPE==TTAND) {
199 /* If AND group wrapped, it can end if all non-optionals were
200 hit. */
201 if (T==Tstart && !currhit) {
202 UNCH Psave = P;
203 int rc = testend(mod, pos, 0, 1);
204 if (Psave!=P) {if ((STATUS = (UNCH)rc)==RCNREQ) goto retest;}
205 else STATUS = RCMISS;
206 }
207
208 /* We only test unhit tokens, so we must use an unhit token
209 as Tstart (which is used to detect when the AND group has
210 wrapped). */
211 else if (HITON(H,Tstart)) Tstart = T;
212 }
213 }
214 else STATUS = RCMISS;
215 TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1, Tstart);
216 }
217 /* TESTEND: End the current group, if possible, and any that it is nested in.
218 The current token will either be a group header, or some token
219 that could not end its group. Return 1 if the (possibly new)
220 current token is repeatable; 0 if it is not.
221 */
testend(mod,pos,andoptsw,newtknsw)222 int testend(mod, pos, andoptsw, newtknsw)
223 struct thdr mod[]; /* Model of current open element. */
224 struct mpos pos[]; /* Position in open element's model. */
225 int andoptsw; /* 1=test optional AND members; 0=ignore. */
226 int newtknsw; /* 1=new token test; 0=end element test. */
227 {
228 int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */
229
230 while (!rc) {
231 TRACEEND("TRACEEND", mod, pos, rc, andoptsw, Tstart);
232 /* TESTMISS:
233 If we've hit no tokens yet in the current group, and
234 the current token is the last unhit one in the group we can test,
235 we will end the group (it may never really have started!)
236 because we might be able to try the token that follows it.
237 In any group, a token is the last testable unhit token if it
238 is the last sequential one, as the GI was already tested against
239 the preceding unhit tokens. In addition,
240 in a SEQ group, it is the last testable unhit token if it isn't
241 optional, because we can't skip past it to the following ones.
242 If we end the group, before popping the level, set M to G, as this
243 level`s group header will be the next level's current token.
244 */
245 if (!ANYHIT(H) && (T==GNUM
246 || (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) {
247 M = G; --P; Tstart = T;
248 if (P<=1) {
249 if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND;
250 else rc = RCMISS;
251 }
252 continue;
253 }
254 /* TESTHIT:
255 See if we've hit all the non-optional tokens in the group.
256 If so, pop to the previous level and set the group's hit bit.
257 If we were called from NEWTOKEN we are trying to find the token
258 to test against the next start-tag, so if the group is repeatable,
259 process it again. (If not, we were called from ECONTEXT and
260 are testing whether the element can be ended.)
261 Otherwise, if we are at the first level, the element is over.
262 */
263 if ((GTYPE==TTOR && TOKENHIT)
264 || (GTYPE==TTSEQ && T==(UNCH)GNUM
265 && (TOKENHIT || BITON(TOCC, TOPT)))
266 || (GTYPE==TTAND && allhit(&GHDR, H, 0, andoptsw))) {
267 M = G;
268 --P;
269 HITSET(H, T);
270 Tstart = T;
271 if (newtknsw && BITON(TOCC, TREP)) rc = RCHIT;
272 else if (P<=1) rc = RCEND;
273 /* If we are looking for a new token to test against the next
274 start-tag, then we need to consider optional and members
275 in this group, even if we didn't need to consider them
276 in the group that we just ended because that group had
277 wrapped. */
278 else if (newtknsw) andoptsw = 1;
279 /* Else loop to test new outer group. */
280 }
281 else rc = RCNREQ; /* No group ended this time, so return. */
282 }
283 TRACEEND("ENDFOUND", mod, pos, rc, andoptsw, Tstart);
284 return(rc);
285 }
286 /* TOKENOPT: Return 1 if current token is contextually optional;
287 otherwise, return 0.
288 */
tokenopt(mod,pos)289 int tokenopt(mod, pos)
290 struct thdr mod[]; /* Model of current open element. */
291 struct mpos pos[]; /* Position in open element's model. */
292 {
293 TRACEEND("TOKENOPT", mod, pos, 0, 0, Tstart);
294 return (BITON(TOCC, TOPT) /* Inherently optional. */
295 || TOKENHIT /* Was hit (handles "plus" suffix case). */
296 || (!ANYHIT(H) && groupopt(mod, pos)));
297 /* In optional group with no hits. */
298 }
299 /* GROUPOPT: Temporarily makes the current group be the current token so that
300 TOKENOPT() can be applied to it. Returns the value returned
301 by TOKENOPT.
302 */
groupopt(mod,pos)303 int groupopt(mod, pos)
304 struct thdr mod[]; /* Model of current open element. */
305 struct mpos pos[]; /* Position in open element's model. */
306 {
307 UNCH saveM; /* Save M when testing if group is not required.*/
308 int rc; /* 1=contextually optional; 0=not. */
309
310 if (P==1) return(BITON(GOCC, TOPT) || TOKENHIT);
311 saveM = M; M = G; --P;
312 rc = tokenopt(mod, pos);
313 ++P; G = M; M = saveM;
314 return(rc);
315 }
316 /* TOKENREQ: Returns RCREQ if the current token is "contextually required".
317 That is, it is not contextually optional and
318 1) it is a member of a "seq" group that is either required
319 or has at least 1 hit token.
320 2) it is a member of an "and" group in which all other
321 tokens were hit.
322 Optional tokens are not counted
323 if GI is ETDCDATA, as we are looking for an
324 omitted start-tag. Otherwise, they are counted,
325 as the GI might match one of them.
326 Returns RCNREQ if the current token is "not required".
327 */
tokenreq(gi,mod,pos)328 int tokenreq(gi, mod, pos)
329 struct etd *gi; /* ETD of new GI. */
330 struct thdr mod[]; /* Model of current open element. */
331 struct mpos pos[]; /* Position in open element's model. */
332 {
333 TRACEGI("TOKENREQ", gi, mod, pos, Tstart);
334 return( tokenopt(mod, pos) ? RCNREQ
335 : ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ)
336 #if 0
337 || (GTYPE==TTAND && allhit(&GHDR, H, T, \*gi!=ETDCDATA*\ 1))
338 #endif
339 )
340 ? RCREQ : RCNREQ );
341 }
342 /* GROUPREQ: Temporarily makes the current group be the current token so that
343 TOKENREQ() can be applied to it. Returns the value returned
344 by TOKENREQ.
345 */
groupreq(gi,mod,pos)346 int groupreq(gi, mod, pos)
347 struct etd *gi; /* ETD of new GI. */
348 struct thdr mod[]; /* Model of current open element. */
349 struct mpos pos[]; /* Position in open element's model. */
350 {
351 UNCH saveM; /* Save M when testing if group is not required.*/
352 int rc; /* Return code: RCREQ RCNREQ */
353
354 if (P==1) return(BITOFF(GOCC, TOPT) ? RCREQ : RCNREQ);
355 saveM = M; M = G; --P;
356 rc = tokenreq(gi, mod, pos);
357 ++P; G = M; M = saveM;
358 return(rc);
359 }
360 /* GRPSZ: Returns the number of tokens spanned by a group in the model (M),
361 from the group's start (G) to a specified index within the group (T).
362 M = 0, plus 1 for each token in the group, plus the size of
363 any subgroups (gotten by calling GRPSZ recursively). On entry,
364 M must be equal to G at the current level.
365 */
grpsz(g,t)366 int grpsz(g, t)
367 struct thdr *g; /* mod[G]: Ptr to group in the model. */
368 int t; /* T: Index of last token in the group. */
369 {
370 struct thdr *p = g; /* Ptr to current token in the model. */
371 int m = 0; /* Size of group (including nested groups). */
372 int i = 0; /* Number of group members (loop counter). */
373 UNS type; /* Token type (without TOREP bits). */
374
375 while (++i<=t) {
376 ++p; ++m;
377 type = GET(p->ttype, TTMASK);
378 if (type==TTOR || type==TTSEQ || type==TTAND) {
379 m += grpsz(p, p->tu.tnum);
380 p = g+m;
381 }
382 }
383 return(m);
384 }
385 /* ALLHIT: Returns 1 if all hit bits for the specified group are turned on,
386 (other than those that correspond to optional tokens if "opt" is
387 0) and the "but" bit (all bits if "but" bit is zero). Otherwise,
388 returns 0. GRPSZ is used to skip past subgroup tokens.
389 */
allhit(p,hits,but,opt)390 int allhit(p, hits, but, opt)
391 struct thdr *p; /* mod[G]: Ptr to group in the model. */
392 unsigned long *hits; /* H: Hit bits to be tested. */
393 int but; /* Index of bit to ignore; 0=test all. */
394 int opt; /* 1=optional tokens must be hit; 0=ignore. */
395 {
396 int b = 0; /* Index of bit being tested in hits. */
397 int e = p->tu.tnum; /* Ending index (number of bits to test). */
398 unsigned type; /* Token type (without TOREP bits). */
399
400 while (++p, ++b<=e) {
401 if (HITOFF(hits,b) && (opt || BITOFF(p->ttype,TOPT)) && b!=but)
402 return 0;
403 if ((type = GET(p->ttype,TTMASK))==TTOR || type==TTSEQ || type==TTAND)
404 p += grpsz(p, p->tu.tnum);
405 }
406 return 1;
407 }
408 /* OFFBIT: Returns the index of the first unset bit after (i.e., not including)
409 the caller's "first" bit. If all bits through the
410 specified last bit are on, it returns 0.
411 */
offbit(bits,first,last)412 int offbit(bits, first, last)
413 unsigned long *bits; /* Bits to be tested. */
414 int first; /* Index of first bit to be tested in bits. */
415 int last; /* Index of last bit to be tested in bits. */
416 {
417 while (++first <= last)
418 if (HITOFF(bits, first))
419 return first;
420 return 0;
421 }
422
423 /* ANYHIT: Return 1 if any bit is set. */
424
anyhit(bits)425 int anyhit(bits)
426 unsigned long *bits;
427 {
428 int i;
429 for (i = 0; i < grplongs; i++)
430 if (bits[i] != 0)
431 return 1;
432 return 0;
433 }
434
435 /*
436 Local Variables:
437 c-indent-level: 5
438 c-continued-statement-offset: 5
439 c-brace-offset: -5
440 c-argdecl-indent: 0
441 c-label-offset: -5
442 comment-column: 30
443 End:
444 */
445