1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 
4 #ifdef __GNUG__
5 #pragma implementation
6 #endif
7 #include "splib.h"
8 #include <stdlib.h>
9 #include "ContentToken.h"
10 #include "macros.h"
11 #include "ElementType.h"
12 #include "Vector.h"
13 #include "Dtd.h"
14 #include "MessageArg.h"
15 
16 #ifdef SP_NAMESPACE
17 namespace SP_NAMESPACE {
18 #endif
19 
~Transition()20 Transition::~Transition() {}
21 
AndModelGroup(NCVector<Owner<ContentToken>> & v,ContentToken::OccurrenceIndicator oi)22 AndModelGroup::AndModelGroup(NCVector<Owner<ContentToken> > &v,
23 			     ContentToken::OccurrenceIndicator oi)
24 : ModelGroup(v, oi)
25 {
26 }
27 
connector() const28 ModelGroup::Connector AndModelGroup::connector() const
29 {
30   return andConnector;
31 }
32 
OrModelGroup(NCVector<Owner<ContentToken>> & v,ContentToken::OccurrenceIndicator oi)33 OrModelGroup::OrModelGroup(NCVector<Owner<ContentToken> > &v,
34 			   ContentToken::OccurrenceIndicator oi)
35 : ModelGroup(v, oi)
36 {
37   setOrGroup();
38 }
39 
connector() const40 ModelGroup::Connector OrModelGroup::connector() const
41 {
42   return orConnector;
43 }
44 
45 
SeqModelGroup(NCVector<Owner<ContentToken>> & v,ContentToken::OccurrenceIndicator oi)46 SeqModelGroup::SeqModelGroup(NCVector<Owner<ContentToken> > &v,
47 			     ContentToken::OccurrenceIndicator oi)
48 : ModelGroup(v, oi)
49 {
50 }
51 
connector() const52 ModelGroup::Connector SeqModelGroup::connector() const
53 {
54   return seqConnector;
55 }
56 
57 
ModelGroup(NCVector<Owner<ContentToken>> & v,OccurrenceIndicator oi)58 ModelGroup::ModelGroup(NCVector<Owner<ContentToken> > &v,
59 		       OccurrenceIndicator oi)
60 : ContentToken(oi)
61 {
62   members_.swap(v);
63 }
64 
grpgtcnt() const65 unsigned long ModelGroup::grpgtcnt() const
66 {
67   unsigned long cnt = 1;
68   for (size_t i = 0; i < members_.size(); i++)
69     cnt += members_[i]->grpgtcnt();
70   return cnt;
71 }
72 
setOrGroup()73 void ModelGroup::setOrGroup()
74 {
75   for (size_t i = 0; i < members_.size(); i++)
76     members_[i]->setOrGroupMember();
77 }
78 
asModelGroup() const79 const ModelGroup *ModelGroup::asModelGroup() const
80 {
81   return this;
82 }
83 
ElementToken(const ElementType * element,OccurrenceIndicator oi)84 ElementToken::ElementToken(const ElementType *element, OccurrenceIndicator oi)
85 : LeafContentToken(element, oi)
86 {
87 }
88 
ContentToken(OccurrenceIndicator oi)89 ContentToken::ContentToken(OccurrenceIndicator oi)
90 : occurrenceIndicator_(oi)
91 {
92 }
93 
grpgtcnt() const94 unsigned long ContentToken::grpgtcnt() const
95 {
96   return 1;
97 }
98 
setOrGroupMember()99 void ContentToken::setOrGroupMember()
100 {
101 }
102 
asModelGroup() const103 const ModelGroup *ContentToken::asModelGroup() const
104 {
105   return 0;
106 }
107 
asLeafContentToken() const108 const LeafContentToken *ContentToken::asLeafContentToken() const
109 {
110   return 0;
111 }
112 
LeafContentToken(const ElementType * element,OccurrenceIndicator oi)113 LeafContentToken::LeafContentToken(const ElementType *element,
114 				   OccurrenceIndicator oi)
115 : element_(element), ContentToken(oi), isFinal_(0), orGroupMember_(0),
116   requiredIndex_(size_t(-1))
117 {
118 }
119 
isInitial() const120 Boolean LeafContentToken::isInitial() const
121 {
122   return 0;
123 }
124 
setOrGroupMember()125 void LeafContentToken::setOrGroupMember()
126 {
127   orGroupMember_ = 1;
128 }
129 
asLeafContentToken() const130 const LeafContentToken *LeafContentToken::asLeafContentToken() const
131 {
132   return this;
133 }
134 
PcdataToken()135 PcdataToken::PcdataToken()
136 : LeafContentToken(0, rep)
137 {
138 }
139 
InitialPseudoToken()140 InitialPseudoToken::InitialPseudoToken()
141 : LeafContentToken(0, none)
142 {
143 }
144 
isInitial() const145 Boolean InitialPseudoToken::isInitial() const
146 {
147   return 1;
148 }
149 
DataTagGroup(NCVector<Owner<ContentToken>> & vec,OccurrenceIndicator oi)150 DataTagGroup::DataTagGroup(NCVector<Owner<ContentToken> > &vec,
151 			   OccurrenceIndicator oi)
152 : SeqModelGroup(vec, oi)
153 {
154 }
155 
DataTagElementToken(const ElementType * element,Vector<Text> & templates,Text & paddingTemplate)156 DataTagElementToken::DataTagElementToken(const ElementType *element,
157 					 Vector<Text> &templates,
158 					 Text &paddingTemplate)
159 : ElementToken(element, ContentToken::none),
160   havePaddingTemplate_(1)
161 {
162   templates.swap(templates_);
163   paddingTemplate.swap(paddingTemplate_);
164 }
165 
DataTagElementToken(const ElementType * element,Vector<Text> & templates)166 DataTagElementToken::DataTagElementToken(const ElementType *element,
167 					 Vector<Text> &templates)
168 : ElementToken(element, ContentToken::none),
169   havePaddingTemplate_(0)
170 {
171   templates.swap(templates_);
172 }
173 
~ContentToken()174 ContentToken::~ContentToken()
175 {
176 }
177 
178 struct GroupInfo {
179   unsigned nextLeafIndex;
180   PackedBoolean containsPcdata;
181   unsigned andStateSize;
182   Vector<unsigned> nextTypeIndex;
183   GroupInfo(size_t);
184 };
185 
186 
GroupInfo(size_t nType)187 GroupInfo::GroupInfo(size_t nType)
188 : nextTypeIndex(nType, 0), nextLeafIndex(0), containsPcdata(0), andStateSize(0)
189 {
190 }
191 
CompiledModelGroup(Owner<ModelGroup> & modelGroup)192 CompiledModelGroup::CompiledModelGroup(Owner<ModelGroup> &modelGroup)
193 : modelGroup_(modelGroup.extract())
194 {
195 }
196 
compile(size_t nElementTypeIndex,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)197 void CompiledModelGroup::compile(size_t nElementTypeIndex,
198 				 Vector<ContentModelAmbiguity> &ambiguities,
199 				 Boolean &pcdataUnreachable)
200 {
201   FirstSet first;
202   LastSet last;
203   GroupInfo info(nElementTypeIndex);
204   modelGroup_->analyze(info, 0, 0, first, last);
205   for (unsigned i = 0; i < last.size(); i++)
206     last[i]->setFinal();
207   andStateSize_ = info.andStateSize;
208   containsPcdata_ = info.containsPcdata;
209   initial_ = new InitialPseudoToken;
210   LastSet initialSet(1);
211   initialSet[0] = initial_.pointer();
212   ContentToken::addTransitions(initialSet, first, 1, 0, 0);
213   if (modelGroup_->inherentlyOptional())
214     initial_->setFinal();
215   pcdataUnreachable = 0;
216   Vector<unsigned> minAndDepth(info.nextLeafIndex);
217   Vector<size_t> elementTransition(nElementTypeIndex);
218   initial_->finish(minAndDepth, elementTransition, ambiguities,
219 		   pcdataUnreachable);
220   modelGroup_->finish(minAndDepth, elementTransition, ambiguities,
221 		      pcdataUnreachable);
222   if (!containsPcdata_)
223     pcdataUnreachable = 0;
224 }
225 
finish(Vector<unsigned> & minAndDepth,Vector<size_t> & elementTransition,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)226 void ModelGroup::finish(Vector<unsigned> &minAndDepth,
227 			Vector<size_t> &elementTransition,
228 			Vector<ContentModelAmbiguity> &ambiguities,
229 			Boolean &pcdataUnreachable)
230 {
231   for (unsigned i = 0; i < nMembers(); i++)
232     member(i).finish(minAndDepth, elementTransition, ambiguities,
233 		     pcdataUnreachable);
234 }
235 
finish(Vector<unsigned> & minAndDepthVec,Vector<size_t> & elementTransitionVec,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)236 void LeafContentToken::finish(Vector<unsigned> &minAndDepthVec,
237 			      Vector<size_t> &elementTransitionVec,
238 			      Vector<ContentModelAmbiguity> &ambiguities,
239 			      Boolean &pcdataUnreachable)
240 {
241   if (andInfo_) {
242     andFinish(minAndDepthVec, elementTransitionVec, ambiguities,
243 	      pcdataUnreachable);
244     return;
245   }
246   Vector<size_t>::iterator elementTransition = elementTransitionVec.begin();
247   Vector<unsigned>::iterator minAndDepth = minAndDepthVec.begin();
248   minAndDepthVec.assign(minAndDepthVec.size(), unsigned(-1));
249   elementTransitionVec.assign(elementTransitionVec.size(), size_t(-1));
250   pcdataTransitionType_ = 0;
251   simplePcdataTransition_ = 0;
252   // follow_ is in decreasing order of andDepth because of how it's
253   // constructed.
254   size_t n = follow_.size();
255   Vector<LeafContentToken *>::iterator follow = follow_.begin();
256   size_t j = 0;
257   for (size_t i = 0; i < n; i++) {
258     unsigned &minDepth = minAndDepth[follow[i]->index()];
259     if (minDepth) {
260       minDepth = 0;
261       if (j != i)
262 	follow[j] = follow[i];
263       if (i == requiredIndex_)
264 	requiredIndex_ = j;
265       const ElementType *e = follow[i]->elementType();
266       unsigned ei;
267       if (e == 0) {
268 	if (follow[i]->andInfo_ == 0) {
269 	  simplePcdataTransition_ = follow[i];
270 	  pcdataTransitionType_ = 1;
271 	}
272 	else
273 	  pcdataTransitionType_ = 2;
274 	ei = 0;
275       }
276       else
277 	ei = e->index();
278       if (elementTransition[ei] != size_t(-1)) {
279 	const LeafContentToken *prev = follow[elementTransition[ei]];
280 	// This might not be true: consider (a & b?)*; after the
281 	// a there are two different ways to get to the same b,
282 	// with the same and depth.
283 	if (follow[i] != prev) {
284 	  ambiguities.resize(ambiguities.size() + 1);
285 	  ContentModelAmbiguity &a = ambiguities.back();
286 	  a.from = this;
287 	  a.to1 = prev;
288 	  a.to2 = follow[i];
289 	  a.andDepth = 0;
290 	}
291       }
292       elementTransition[ei] = j;
293       j++;
294     }
295   }
296   if (pcdataTransitionType_ == 0)
297     pcdataUnreachable = 1;
298   follow_.resize(j);
299 }
300 
andFinish(Vector<unsigned> & minAndDepthVec,Vector<size_t> & elementTransitionVec,Vector<ContentModelAmbiguity> & ambiguities,Boolean & pcdataUnreachable)301 void LeafContentToken::andFinish(Vector<unsigned> &minAndDepthVec,
302 				 Vector<size_t> &elementTransitionVec,
303 				 Vector<ContentModelAmbiguity> &ambiguities,
304 				 Boolean &pcdataUnreachable)
305 {
306   // Vector mapping element type index to index of leaf content token
307   // of that type to which there is a transition, which is the "worst"
308   // from the point of view of ambiguity.
309   Vector<size_t>::iterator elementTransition = elementTransitionVec.begin();
310   // Vector mapping index of leaf content token
311   // to minimum AND depth of transition to that token.
312   Vector<unsigned>::iterator minAndDepth = minAndDepthVec.begin();
313   minAndDepthVec.assign(minAndDepthVec.size(), unsigned(-1));
314   elementTransitionVec.assign(elementTransitionVec.size(), size_t(-1));
315   pcdataTransitionType_ = 0;
316   simplePcdataTransition_ = 0;
317   unsigned pcdataMinCovered = 0;
318 
319   // follow_ is in decreasing order of andDepth because of how it's
320   // constructed.
321   size_t n = follow_.size();
322   size_t j = 0;
323   Vector<Transition>::iterator andFollow = andInfo_->follow.begin();
324   for (size_t i = 0; i < n; i++) {
325     unsigned &minDepth = minAndDepth[follow_[i]->index()];
326     // ignore transitions to the same token with the same and depth.
327     if (andFollow[i].andDepth < minDepth) {
328       minDepth = andFollow[i].andDepth;
329       if (j != i) {
330 	follow_[j] = follow_[i];
331 	andFollow[j] = andFollow[i];
332       }
333       if (i == requiredIndex_)
334 	requiredIndex_ = j;
335       const ElementType *e = follow_[i]->elementType();
336       unsigned ei;
337       if (e == 0) {
338 	if (pcdataTransitionType_ == 0) {
339 	  const AndModelGroup *andAncestor = andInfo_->andAncestor;
340 	  unsigned groupIndex = andInfo_->andGroupIndex;
341 	  do {
342 	    Boolean hasNonNull = 0;
343 	    for (unsigned k = 0; k < andAncestor->nMembers(); k++)
344 	      if (k != groupIndex
345 		  && !andAncestor->member(k).inherentlyOptional()) {
346 		hasNonNull = 1;
347 		break;
348 	      }
349 	    if (hasNonNull) {
350 	      if (minDepth <= andAncestor->andDepth())
351 		pcdataUnreachable = 1;
352 	      break;
353 	    }
354 	    groupIndex = andAncestor->andGroupIndex();
355 	    andAncestor = andAncestor->andAncestor();
356 	  } while (andAncestor);
357 	  if (andFollow[i].isolated)
358 	    pcdataMinCovered = minDepth;
359 	  pcdataTransitionType_ = 2;
360 	}
361 	else {
362 	  if (pcdataMinCovered > minDepth + 1)
363 	    pcdataUnreachable = 1;
364 	  pcdataMinCovered = andFollow[i].isolated ? minDepth : 0;
365 	}
366 	ei = 0;
367       }
368       else
369 	ei = e->index();
370       // If we have transitions t1, t2, ... tN to tokens having
371       // the same element type, with
372       // and-depths d1, d2, ... dN, where d1 >= d2 >= ... >= dN,
373       // then there is an ambiguity unless
374       // d1 > d2 > ... > dN and t1, t2, ... , tN-1 are all isolated.
375       size_t previ = elementTransition[ei];
376       if (previ != size_t(-1)) {
377 	const LeafContentToken *prev = follow_[previ];
378 	// This might not be true: consider (a & b?)*; after the
379 	// a there are two different ways to get to the same b,
380 	// with the same and depth.
381 	if (follow_[i] != prev
382 	    && (andFollow[previ].andDepth == andFollow[i].andDepth
383 		|| !andFollow[previ].isolated)) {
384 	  ambiguities.resize(ambiguities.size() + 1);
385 	  ContentModelAmbiguity &a = ambiguities.back();
386 	  a.from = this;
387 	  a.to1 = prev;
388 	  a.to2 = follow_[i];
389 	  a.andDepth = andFollow[i].andDepth;
390 	}
391 	if (andFollow[previ].isolated)
392 	  elementTransition[ei] = j;
393       }
394       else
395 	elementTransition[ei] = j;
396       j++;
397     }
398   }
399   if (pcdataMinCovered > 0 || pcdataTransitionType_ == 0)
400     pcdataUnreachable = 1;
401   follow_.resize(j);
402   andInfo_->follow.resize(j);
403 }
404 
analyze(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)405 void ContentToken::analyze(GroupInfo &info,
406 			   const AndModelGroup *andAncestor,
407 			   unsigned andGroupIndex,
408 			   FirstSet &first,
409 			   LastSet &last)
410 {
411   analyze1(info, andAncestor, andGroupIndex, first, last);
412   if (occurrenceIndicator_ & opt)
413     inherentlyOptional_ = 1;
414   if (inherentlyOptional_)
415     first.setNotRequired();
416   if (occurrenceIndicator_ & plus)
417     addTransitions(last, first, 0,
418 		   andIndex(andAncestor), andDepth(andAncestor));
419 }
420 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)421 void LeafContentToken::analyze1(GroupInfo &info,
422 				const AndModelGroup *andAncestor,
423 				unsigned andGroupIndex,
424 				FirstSet &first,
425 				LastSet &last)
426 {
427   leafIndex_ = info.nextLeafIndex++;
428   typeIndex_ = info.nextTypeIndex[element_ ? element_->index() : 0]++;
429   if (andAncestor) {
430     andInfo_ = new AndInfo;
431     andInfo_->andAncestor = andAncestor;
432     andInfo_->andGroupIndex = andGroupIndex;
433   }
434   first.init(this);
435   last.assign(1, this);
436   inherentlyOptional_ = 0;
437 }
438 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)439 void PcdataToken::analyze1(GroupInfo &info,
440 			   const AndModelGroup *andAncestor,
441 			   unsigned andGroupIndex,
442 			   FirstSet &first,
443 			   LastSet &last)
444 {
445   info.containsPcdata = 1;
446   LeafContentToken::analyze1(info, andAncestor, andGroupIndex, first, last);
447 }
448 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)449 void OrModelGroup::analyze1(GroupInfo &info,
450 			    const AndModelGroup *andAncestor,
451 			    unsigned andGroupIndex,
452 			    FirstSet &first,
453 			    LastSet &last)
454 {
455   member(0).analyze(info, andAncestor, andGroupIndex, first, last);
456   first.setNotRequired();
457   inherentlyOptional_ = member(0).inherentlyOptional();
458   for (unsigned i = 1; i < nMembers(); i++) {
459     FirstSet tempFirst;
460     LastSet tempLast;
461     member(i).analyze(info, andAncestor, andGroupIndex, tempFirst, tempLast);
462     first.append(tempFirst);
463     first.setNotRequired();
464     last.append(tempLast);
465     inherentlyOptional_ |= member(i).inherentlyOptional();
466   }
467 }
468 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)469 void SeqModelGroup::analyze1(GroupInfo &info,
470 			     const AndModelGroup *andAncestor,
471 			     unsigned andGroupIndex,
472 			     FirstSet &first,
473 			     LastSet &last)
474 {
475   member(0).analyze(info, andAncestor, andGroupIndex, first, last);
476   inherentlyOptional_ = member(0).inherentlyOptional();
477   for (unsigned i = 1; i < nMembers(); i++) {
478     FirstSet tempFirst;
479     LastSet tempLast;
480     member(i).analyze(info, andAncestor, andGroupIndex, tempFirst, tempLast);
481     addTransitions(last, tempFirst, 1,
482 		   andIndex(andAncestor), andDepth(andAncestor));
483     if (inherentlyOptional_)
484       first.append(tempFirst);
485     if (member(i).inherentlyOptional())
486       last.append(tempLast);
487     else
488       tempLast.swap(last);
489     inherentlyOptional_ &= member(i).inherentlyOptional();
490   }
491 }
492 
analyze1(GroupInfo & info,const AndModelGroup * andAncestor,unsigned andGroupIndex,FirstSet & first,LastSet & last)493 void AndModelGroup::analyze1(GroupInfo &info,
494 			     const AndModelGroup *andAncestor,
495 			     unsigned andGroupIndex,
496 			     FirstSet &first,
497 			     LastSet &last)
498 {
499   andDepth_ = ContentToken::andDepth(andAncestor);
500   andIndex_ = ContentToken::andIndex(andAncestor);
501   andAncestor_ = andAncestor;
502   andGroupIndex_ = andGroupIndex;
503   if (andIndex_ + nMembers() > info.andStateSize)
504     info.andStateSize = andIndex_ + nMembers();
505   Vector<FirstSet> firstVec(nMembers());
506   Vector<LastSet> lastVec(nMembers());
507   member(0).analyze(info, this, 0, firstVec[0], lastVec[0]);
508   first = firstVec[0];
509   first.setNotRequired();
510   last = lastVec[0];
511   inherentlyOptional_ = member(0).inherentlyOptional();
512   unsigned i;
513   for (i = 1; i < nMembers(); i++) {
514     member(i).analyze(info, this, i, firstVec[i], lastVec[i]);
515     first.append(firstVec[i]);
516     first.setNotRequired();
517     last.append(lastVec[i]);
518     inherentlyOptional_ &= member(i).inherentlyOptional();
519   }
520   for (i = 0; i < nMembers(); i++) {
521     for (unsigned j = 0; j < nMembers(); j++)
522       if (j != i)
523 	addTransitions(lastVec[i], firstVec[j], 0,
524 		       andIndex() + nMembers(),
525 		       andDepth() + 1,
526 		       !member(j).inherentlyOptional(),
527 		       andIndex() + j, andIndex() + i);
528   }
529 }
530 
addTransitions(const LastSet & from,const FirstSet & to,Boolean maybeRequired,unsigned andClearIndex,unsigned andDepth,Boolean isolated,unsigned requireClear,unsigned toSet)531 void ContentToken::addTransitions(const LastSet &from,
532 				  const FirstSet &to,
533 				  Boolean maybeRequired,
534 				  unsigned andClearIndex,
535 				  unsigned andDepth,
536 				  Boolean isolated,
537 				  unsigned requireClear,
538 				  unsigned toSet)
539 {
540   size_t length = from.size();
541   for (unsigned i = 0; i < length; i++)
542     from[i]->addTransitions(to,
543 			    maybeRequired,
544 			    andClearIndex,
545 			    andDepth,
546 			    isolated,
547 			    requireClear,
548 			    toSet);
549 }
550 
addTransitions(const FirstSet & to,Boolean maybeRequired,unsigned andClearIndex,unsigned andDepth,Boolean isolated,unsigned requireClear,unsigned toSet)551 void LeafContentToken::addTransitions(const FirstSet &to,
552 				      Boolean maybeRequired,
553 				      unsigned andClearIndex,
554 				      unsigned andDepth,
555 				      Boolean isolated,
556 				      unsigned requireClear,
557 				      unsigned toSet)
558 {
559   if (maybeRequired && to.requiredIndex() != size_t(-1)) {
560     ASSERT(requiredIndex_ == size_t(-1));
561     requiredIndex_ = to.requiredIndex() + follow_.size();
562   }
563   size_t length = follow_.size();
564   size_t n = to.size();
565   follow_.resize(length + n);
566   for (size_t i = 0; i < n; i++)
567     follow_[length + i] = to.token(i);
568   if (andInfo_) {
569     andInfo_->follow.resize(length + n);
570     for (size_t i = 0; i < n; i++) {
571       Transition &t = andInfo_->follow[length + i];
572       t.clearAndStateStartIndex = andClearIndex;
573       t.andDepth = andDepth;
574       t.isolated = isolated;
575       t.requireClear = requireClear;
576       t.toSet = toSet;
577     }
578   }
579 }
580 
AndState(unsigned n)581 AndState::AndState(unsigned n)
582 : v_(n, PackedBoolean(0)), clearFrom_(0)
583 {
584 }
585 
clearFrom1(unsigned i)586 void AndState::clearFrom1(unsigned i)
587 {
588   while (clearFrom_ > i)
589     v_[--clearFrom_] = 0;
590 }
591 
MatchState()592 MatchState::MatchState()
593 : andState_(0)
594 {
595 }
596 
MatchState(const CompiledModelGroup * model)597 MatchState::MatchState(const CompiledModelGroup *model)
598 : pos_(model ? model->initial() : 0),
599   andState_(model ? model->andStateSize() : 0),
600   minAndDepth_(0)
601 {
602 }
603 
invalidExclusion(const ElementType * e) const604 const LeafContentToken *MatchState::invalidExclusion(const ElementType *e)
605      const
606 {
607   const LeafContentToken *token = pos_->transitionToken(e, andState_,
608 							minAndDepth_);
609   if (token && !token->inherentlyOptional() && !token->orGroupMember())
610     return token;
611   else
612     return 0;
613 }
614 
operator ==(const MatchState & state) const615 Boolean MatchState::operator==(const MatchState &state) const
616 {
617   return (pos_ == state.pos_ && andState_ == state.andState_
618 	  && minAndDepth_ == state.minAndDepth_);
619 }
620 
operator ==(const AndState & state) const621 Boolean AndState::operator==(const AndState &state) const
622 {
623   ASSERT(v_.size() == state.v_.size());
624   for (size_t i = 0; i < v_.size(); i++) {
625     if (i >= clearFrom_ && i >= state.clearFrom_)
626       break;
627     if (v_[i] != state.v_[i])
628       return 0;
629   }
630   return 1;
631 }
632 
633 const LeafContentToken *
transitionToken(const ElementType * to,const AndState & andState,unsigned minAndDepth) const634 LeafContentToken::transitionToken(const ElementType *to,
635 				  const AndState &andState,
636 				  unsigned minAndDepth) const
637 {
638   Vector<LeafContentToken *>::const_iterator p = follow_.begin();
639   if (!andInfo_) {
640     for (size_t n = follow_.size(); n > 0; n--, p++)
641       if ((*p)->elementType() == to)
642 	return *p;
643   }
644   else {
645     Vector<Transition>::const_iterator q = andInfo_->follow.begin();
646     for (size_t n = follow_.size(); n > 0; n--, p++, q++)
647       if ((*p)->elementType() == to
648 	  && ((q->requireClear == unsigned(Transition::invalidIndex)
649 	       || andState.isClear(q->requireClear))
650 	      && q->andDepth >= minAndDepth))
651 	return (*p);
652   }
653   return 0;
654 }
655 
656 Boolean
tryTransition(const ElementType * to,AndState & andState,unsigned & minAndDepth,const LeafContentToken * & newpos) const657 LeafContentToken::tryTransition(const ElementType *to,
658 				AndState &andState,
659 				unsigned &minAndDepth,
660 				const LeafContentToken *&newpos) const
661 {
662   Vector<LeafContentToken *>::const_iterator p = follow_.begin();
663   if (!andInfo_) {
664     for (size_t n = follow_.size(); n > 0; n--, p++) {
665       if ((*p)->elementType() == to) {
666 	newpos = *p;
667 	minAndDepth = newpos->computeMinAndDepth(andState);
668 	return 1;
669       }
670     }
671   }
672   else {
673     Vector<Transition>::const_iterator q = andInfo_->follow.begin();
674     for (size_t n = follow_.size(); n > 0; n--, p++, q++) {
675     if ((*p)->elementType() == to
676 	&& ((q->requireClear == unsigned(Transition::invalidIndex)
677 	     || andState.isClear(q->requireClear))
678 	    && q->andDepth >= minAndDepth)) {
679 	if (q->toSet != unsigned(Transition::invalidIndex))
680 	  andState.set(q->toSet);
681 	andState.clearFrom(q->clearAndStateStartIndex);
682 	newpos = *p;
683 	minAndDepth = newpos->computeMinAndDepth(andState);
684 	return 1;
685       }
686     }
687   }
688   return 0;
689 }
690 
691 void
possibleTransitions(const AndState & andState,unsigned minAndDepth,Vector<const ElementType * > & v) const692 LeafContentToken::possibleTransitions(const AndState &andState,
693 				      unsigned minAndDepth,
694 				      Vector<const ElementType *> &v) const
695 {
696   Vector<LeafContentToken *>::const_iterator p = follow_.begin();
697   if (!andInfo_) {
698     for (size_t n = follow_.size(); n > 0; n--, p++)
699       v.push_back((*p)->elementType());
700   }
701   else {
702     Vector<Transition>::const_iterator q = andInfo_->follow.begin();
703     for (size_t n = follow_.size(); n > 0; n--, p++, q++)
704       if ((q->requireClear == unsigned(Transition::invalidIndex)
705 	   || andState.isClear(q->requireClear))
706 	&& q->andDepth >= minAndDepth)
707 	v.push_back((*p)->elementType());
708   }
709 }
710 
computeMinAndDepth1(const AndState & andState) const711 unsigned LeafContentToken::computeMinAndDepth1(const AndState &andState) const
712 {
713   ASSERT(andInfo_ != 0);
714   unsigned groupIndex = andInfo_->andGroupIndex;
715   for (const AndModelGroup *group = andInfo_->andAncestor;
716        group;
717        groupIndex = group->andGroupIndex(), group = group->andAncestor())
718     for (unsigned i = 0; i < group->nMembers(); i++)
719       if (i != groupIndex && !group->member(i).inherentlyOptional()
720 	  && andState.isClear(group->andIndex() + i))
721 	return group->andDepth() + 1;
722   return 0;
723 }
724 
725 const LeafContentToken *
impliedStartTag(const AndState & andState,unsigned minAndDepth) const726 LeafContentToken::impliedStartTag(const AndState &andState,
727 				  unsigned minAndDepth) const
728 {
729   if (requiredIndex_ != size_t(-1)) {
730     if (!andInfo_)
731       return follow_[requiredIndex_];
732     const Transition &t = andInfo_->follow[requiredIndex_];
733     if ((t.requireClear == unsigned(Transition::invalidIndex)
734 	 || andState.isClear(t.requireClear))
735 	&& t.andDepth >= minAndDepth)
736       return follow_[requiredIndex_];
737   }
738   return 0;
739 }
740 
doRequiredTransition(AndState & andState,unsigned & minAndDepth,const LeafContentToken * & newpos) const741 void LeafContentToken::doRequiredTransition(AndState &andState,
742 					    unsigned &minAndDepth,
743 					    const LeafContentToken *&newpos)
744      const
745 {
746   ASSERT(requiredIndex_ != size_t(-1));
747   if (andInfo_) {
748     const Transition &t = andInfo_->follow[requiredIndex_];
749     if (t.toSet != unsigned(Transition::invalidIndex))
750       andState.set(t.toSet);
751     andState.clearFrom(t.clearAndStateStartIndex);
752   }
753   newpos = follow_[requiredIndex_];
754   minAndDepth = newpos->computeMinAndDepth(andState);
755 }
756 
FirstSet()757 FirstSet::FirstSet()
758 : requiredIndex_(size_t(-1))
759 {
760 }
761 
init(LeafContentToken * p)762 void FirstSet::init(LeafContentToken *p)
763 {
764   v_.assign(1, p);
765   v_.reserve(256);
766   requiredIndex_ = 0;
767 }
768 
append(const FirstSet & set)769 void FirstSet::append(const FirstSet &set)
770 {
771   if (set.requiredIndex_ != size_t(-1)) {
772     ASSERT(requiredIndex_ == size_t(-1));
773     requiredIndex_ = set.requiredIndex_ + v_.size();
774   }
775   size_t oldSize = v_.size();
776   v_.resize(v_.size() + set.v_.size());
777   for (size_t i = 0; i < set.v_.size(); i++)
778     v_[oldSize + i] = set.v_[i];
779 }
780 
append(const LastSet & set)781 void LastSet::append(const LastSet &set)
782 {
783   size_t oldSize = size();
784   resize(size() + set.size());
785   for (size_t i = 0; i < set.size(); i++)
786     (*this)[oldSize + i] = set[i];
787 }
788 
789 #ifdef SP_NAMESPACE
790 }
791 #endif
792