1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3 
4 #include "splib.h"
5 #include "Parser.h"
6 #include "Param.h"
7 #include "Group.h"
8 #include "Markup.h"
9 #include "ParserMessages.h"
10 #include "MessageArg.h"
11 #include "TokenMessageArg.h"
12 #include "token.h"
13 #include "macros.h"
14 
15 #ifdef SP_NAMESPACE
16 namespace SP_NAMESPACE {
17 #endif
18 
parseParam(const AllowedParams & allow,unsigned declInputLevel,Param & parm)19 Boolean Parser::parseParam(const AllowedParams &allow,
20 			   unsigned declInputLevel,
21 			   Param &parm)
22 {
23   for (;;) {
24     Token token = getToken(allow.mainMode());
25     switch (token) {
26     case tokenUnrecognized:
27       if (reportNonSgmlCharacter())
28 	break;
29       {
30 	message(ParserMessages::markupDeclarationCharacter,
31 		StringMessageArg(currentToken()),
32 		AllowedParamsMessageArg(allow, syntaxPointer()));
33       }
34       return 0;
35     case tokenEe:
36       if (inputLevel() <= declInputLevel) {
37 	message(ParserMessages::declarationLevel);
38 	return 0;
39       }
40       if (currentMarkup())
41 	currentMarkup()->addEntityEnd();
42       popInputStack();
43       break;
44     case tokenCom:
45       if (!parseComment(comMode))
46 	return 0;
47       if (options().warnPsComment)
48 	message(ParserMessages::psComment);
49       break;
50     case tokenDso:
51       if (!allow.dso()) {
52 	paramInvalidToken(tokenDso, allow);
53 	return 0;
54       }
55       if (currentMarkup())
56 	currentMarkup()->addDelim(Syntax::dDSO);
57       parm.type = Param::dso;
58       return 1;
59     case tokenGrpo:
60       if (currentMarkup())
61 	currentMarkup()->addDelim(Syntax::dGRPO);
62       switch (allow.group()) {
63       case Param::invalid:
64 	paramInvalidToken(tokenGrpo, allow);
65 	return 0;
66       case Param::modelGroup:
67 	{
68 	  ModelGroup *group;
69 	  if (!parseModelGroup(1, declInputLevel, group, grpsufMode))
70 	    return 0;
71 	  parm.type = Param::modelGroup;
72 	  parm.modelGroupPtr = group;
73 	}
74 	break;
75       case Param::nameGroup:
76 	if (!parseNameGroup(declInputLevel, parm))
77 	  return 0;
78 	break;
79       case Param::nameTokenGroup:
80 	if (!parseNameTokenGroup(declInputLevel, parm))
81 	  return 0;
82 	break;
83       default:
84 	CANNOT_HAPPEN();
85       }
86       parm.type = allow.group();
87       return 1;
88     case tokenLita:
89     case tokenLit:
90       parm.type = allow.literal();
91       parm.lita = token == tokenLita;
92       switch (allow.literal()) {
93       case Param::invalid:
94 	paramInvalidToken(token, allow);
95 	return 0;
96       case Param::minimumLiteral:
97 	if (!parseMinimumLiteral(parm.lita, parm.literalText))
98 	  return 0;
99 	break;
100       case Param::attributeValueLiteral:
101 	if (!parseAttributeValueLiteral(parm.lita, parm.literalText))
102 	  return 0;
103 	break;
104       case Param::tokenizedAttributeValueLiteral:
105 	if (!parseTokenizedAttributeValueLiteral(parm.lita, parm.literalText))
106 	  return 0;
107 	break;
108       case Param::systemIdentifier:
109 	if (!parseSystemIdentifier(parm.lita, parm.literalText))
110 	  return 0;
111 	break;
112       case Param::paramLiteral:
113 	if (!parseParameterLiteral(parm.lita, parm.literalText))
114 	  return 0;
115 	break;
116       }
117       if (currentMarkup())
118 	currentMarkup()->addLiteral(parm.literalText);
119       return 1;
120     case tokenMdc:
121       if (!allow.mdc()) {
122 	paramInvalidToken(tokenMdc, allow);
123 	return 0;
124       }
125       if (inputLevel() > declInputLevel)
126 	message(ParserMessages::parameterEntityNotEnded);
127       if (currentMarkup())
128 	currentMarkup()->addDelim(Syntax::dMDC);
129       parm.type = Param::mdc;
130       return 1;
131     case tokenMinus:
132       parm.type = Param::minus;
133       if (currentMarkup())
134 	currentMarkup()->addDelim(Syntax::dMINUS);
135       return 1;
136     case tokenMinusGrpo:
137       if (!allow.exclusions()) {
138 	paramInvalidToken(tokenMinusGrpo, allow);
139 	return 0;
140       }
141       if (currentMarkup()) {
142 	currentMarkup()->addDelim(Syntax::dMINUS);
143 	currentMarkup()->addDelim(Syntax::dGRPO);
144       }
145       parm.type = Param::exclusions;
146       return parseElementNameGroup(declInputLevel, parm);
147     case tokenPero:
148       parm.type = Param::pero;
149       if (currentMarkup())
150 	currentMarkup()->addDelim(Syntax::dPERO);
151       return 1;
152     case tokenPeroGrpo:
153       if (!inInstance())
154 	message(ParserMessages::peroGrpoProlog);
155       // fall through
156     case tokenPeroNameStart:
157       {
158 	if (inInstance()) {
159 	  if (options().warnInstanceParamEntityRef)
160 	    message(ParserMessages::instanceParamEntityRef);
161 	}
162 	else {
163 	  if (options().warnInternalSubsetPsParamEntityRef && inputLevel() == 1)
164 	    message(ParserMessages::internalSubsetPsParamEntityRef);
165 	}
166 	ConstPtr<Entity> entity;
167 	Ptr<EntityOrigin> origin;
168 	if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
169 	  return 0;
170 	if (!entity.isNull())
171 	  entity->declReference(*this, origin);
172       }
173       break;
174     case tokenPlusGrpo:
175       if (!allow.inclusions()) {
176 	paramInvalidToken(tokenPlusGrpo, allow);
177 	return 0;
178       }
179       if (currentMarkup()) {
180 	currentMarkup()->addDelim(Syntax::dPLUS);
181 	currentMarkup()->addDelim(Syntax::dGRPO);
182       }
183       parm.type = Param::inclusions;
184       return parseElementNameGroup(declInputLevel, parm);
185     case tokenRni:
186       if (!allow.rni()) {
187 	paramInvalidToken(tokenRni, allow);
188 	return 0;
189       }
190       return parseIndicatedReservedName(allow, parm);
191     case tokenS:
192       if (currentMarkup())
193 	currentMarkup()->addS(currentChar());
194       break;
195     case tokenNameStart:
196       switch (allow.nameStart()) {
197       case Param::invalid:
198 	paramInvalidToken(tokenNameStart, allow);
199 	return 0;
200       case Param::reservedName:
201 	return parseReservedName(allow, parm);
202       case Param::name:
203 	extendNameToken(syntax().namelen(), ParserMessages::nameLength);
204 	parm.type = Param::name;
205 	getCurrentToken(syntax().generalSubstTable(), parm.token);
206 	if (currentMarkup())
207 	  currentMarkup()->addName(currentInput());
208 	return 1;
209       case Param::entityName:
210 	extendNameToken(syntax().namelen(), ParserMessages::nameLength);
211 	parm.type = Param::entityName;
212 	getCurrentToken(syntax().entitySubstTable(), parm.token);
213 	if (currentMarkup())
214 	  currentMarkup()->addName(currentInput());
215 	return 1;
216       case Param::paramEntityName:
217 	extendNameToken(syntax().penamelen(),
218 			ParserMessages::parameterEntityNameLength);
219 	parm.type = Param::paramEntityName;
220 	getCurrentToken(syntax().entitySubstTable(), parm.token);
221 	if (currentMarkup())
222 	  currentMarkup()->addName(currentInput());
223 	return 1;
224       case Param::attributeValue:
225 	return parseAttributeValueParam(parm);
226       }
227       break;
228     case tokenDigit:
229       switch (allow.digit()) {
230       case Param::invalid:
231 	paramInvalidToken(tokenDigit, allow);
232 	return 0;
233       case Param::number:
234 	extendNumber(syntax().namelen(), ParserMessages::numberLength);
235 	parm.type = Param::number;
236 	getCurrentToken(parm.token);
237 	if (currentMarkup())
238 	  currentMarkup()->addNumber(currentInput());
239 	return 1;
240       case Param::attributeValue:
241 	return parseAttributeValueParam(parm);
242       }
243       break;
244     case tokenLcUcNmchar:
245       switch (allow.nmchar()) {
246       case Param::invalid:
247 	paramInvalidToken(tokenLcUcNmchar, allow);
248 	return 0;
249       case Param::attributeValue:
250 	return parseAttributeValueParam(parm);
251       }
252       break;
253     default:
254       CANNOT_HAPPEN();
255     }
256   }
257 }
258 
paramInvalidToken(Token token,const AllowedParams & allow)259 void Parser::paramInvalidToken(Token token, const AllowedParams &allow)
260 {
261   message(ParserMessages::paramInvalidToken,
262 	  TokenMessageArg(token, allow.mainMode(),
263 			  syntaxPointer(), sdPointer()),
264 	  AllowedParamsMessageArg(allow, syntaxPointer()));
265 }
266 
parseGroupToken(const AllowedGroupTokens & allow,unsigned nestingLevel,unsigned declInputLevel,unsigned groupInputLevel,GroupToken & gt)267 Boolean Parser::parseGroupToken(const AllowedGroupTokens &allow,
268 				unsigned nestingLevel,
269 				unsigned declInputLevel,
270 				unsigned groupInputLevel,
271 				GroupToken &gt)
272 {
273   for (;;) {
274     Token token = getToken(grpMode);
275     switch (token) {
276     case tokenEe:
277       if (inputLevel() <= groupInputLevel) {
278 	message(ParserMessages::groupLevel);
279 	if (inputLevel() <= declInputLevel)
280 	  return 0;
281       }
282       else if (!sd().www())
283 	message(ParserMessages::groupEntityEnd);
284       if (currentMarkup())
285 	currentMarkup()->addEntityEnd();
286       popInputStack();
287       break;
288     case tokenPeroGrpo:
289       if (!inInstance())
290 	message(ParserMessages::peroGrpoProlog);
291       // fall through
292     case tokenPeroNameStart:
293       {
294 	if (options().warnInternalSubsetTsParamEntityRef && inputLevel() == 1)
295 	  message(ParserMessages::internalSubsetTsParamEntityRef);
296 	ConstPtr<Entity> entity;
297 	Ptr<EntityOrigin> origin;
298 	if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
299 	  return 0;
300 	if (!entity.isNull())
301 	  entity->declReference(*this, origin);
302       }
303       break;
304     case tokenUnrecognized:
305       if (reportNonSgmlCharacter())
306 	break;
307       {
308 	message(ParserMessages::groupCharacter,
309 		StringMessageArg(currentToken()),
310 		AllowedGroupTokensMessageArg(allow, syntaxPointer()));
311       }
312       return 0;
313     case tokenDtgo:
314       if (!allow.groupToken(GroupToken::dataTagGroup)) {
315 	groupTokenInvalidToken(tokenDtgo, allow);
316 	return 0;
317       }
318       if (sd().datatag())
319 	message(ParserMessages::datatagNotImplemented);
320       if (currentMarkup())
321 	currentMarkup()->addDelim(Syntax::dDTGO);
322       return parseDataTagGroup(nestingLevel + 1, declInputLevel, gt);
323     case tokenGrpo:
324       if (currentMarkup())
325 	currentMarkup()->addDelim(Syntax::dGRPO);
326       switch (allow.group()) {
327       case GroupToken::modelGroup:
328 	{
329 	  ModelGroup *modelGroup;
330 	  if (!parseModelGroup(nestingLevel + 1, declInputLevel, modelGroup,
331 			       grpMode))
332 	    return 0;
333 	  gt.model = modelGroup;
334 	  gt.type = GroupToken::modelGroup;
335 	  return 1;
336 	}
337       case GroupToken::dataTagTemplateGroup:
338 	return parseDataTagTemplateGroup(nestingLevel + 1, declInputLevel, gt);
339       default:
340 	groupTokenInvalidToken(tokenGrpo, allow);
341 	return 0;
342       }
343       break;
344     case tokenRni:
345       if (!allow.groupToken(GroupToken::pcdata)) {
346 	groupTokenInvalidToken(tokenRni, allow);
347 	return 0;
348       }
349       Syntax::ReservedName rn;
350       if (!getIndicatedReservedName(&rn))
351 	return 0;
352       if (rn != Syntax::rPCDATA) {
353 	StringC token(syntax().delimGeneral(Syntax::dRNI));
354 	token += syntax().reservedName(Syntax::rPCDATA);
355 	message(ParserMessages::invalidToken, StringMessageArg(token));
356 	return 0;
357       }
358       gt.type = GroupToken::pcdata;
359       gt.contentToken = new PcdataToken;
360       return 1;
361     case tokenS:
362       if (currentMarkup()) {
363 	extendS();
364 	currentMarkup()->addS(currentInput());
365       }
366       break;
367     case tokenNameStart:
368       switch (allow.nameStart()) {
369       case GroupToken::elementToken:
370 	{
371 	  extendNameToken(syntax().namelen(), ParserMessages::nameLength);
372 	  gt.type = GroupToken::elementToken;
373 	  StringC &buffer = nameBuffer();
374 	  getCurrentToken(syntax().generalSubstTable(), buffer);
375 	  if (currentMarkup())
376 	    currentMarkup()->addName(currentInput());
377 	  const ElementType *e = lookupCreateElement(buffer);
378 	  ContentToken::OccurrenceIndicator oi
379 	    = getOccurrenceIndicator(grpMode);
380 	  gt.contentToken = new ElementToken(e, oi);
381 	  return 1;
382 	}
383       case GroupToken::name:
384       case GroupToken::nameToken:
385 	extendNameToken(syntax().namelen(),
386 			token == GroupToken::name
387 			? ParserMessages::nameLength
388 			: ParserMessages::nameTokenLength);
389 	getCurrentToken(syntax().generalSubstTable(), gt.token);
390 	gt.type = allow.nameStart();
391 	if (currentMarkup()) {
392 	  if (gt.type == GroupToken::nameToken)
393 	    currentMarkup()->addNameToken(currentInput());
394 	  else
395 	    currentMarkup()->addName(currentInput());
396 	}
397 	return 1;
398       default:
399 	groupTokenInvalidToken(tokenNameStart, allow);
400 	return 0;
401       }
402     case tokenDigit:
403     case tokenLcUcNmchar:
404       if (!allow.groupToken(GroupToken::nameToken)) {
405 	groupTokenInvalidToken(token, allow);
406 	return 0;
407       }
408       extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
409       getCurrentToken(syntax().generalSubstTable(), gt.token);
410       gt.type = GroupToken::nameToken;
411       if (currentMarkup())
412 	currentMarkup()->addNameToken(currentInput());
413       return 1;
414     case tokenLit:
415     case tokenLita:
416       // parameter literal in data tag pattern
417       if (!allow.groupToken(GroupToken::dataTagLiteral)) {
418 	groupTokenInvalidToken(token, allow);
419 	return 0;
420       }
421       if (!parseDataTagParameterLiteral(token == tokenLita, gt.text))
422 	return 0;
423       gt.type = GroupToken::dataTagLiteral;
424       if (currentMarkup())
425 	currentMarkup()->addLiteral(gt.text);
426       return 1;
427     case tokenAnd:
428     case tokenSeq:
429     case tokenOr:
430     case tokenDtgc:
431     case tokenGrpc:
432     case tokenOpt:
433     case tokenPlus:
434     case tokenRep:
435       groupTokenInvalidToken(token, allow);
436       return 0;
437     }
438   }
439 }
440 
441 
groupTokenInvalidToken(Token token,const AllowedGroupTokens & allow)442 void Parser::groupTokenInvalidToken(Token token, const AllowedGroupTokens &allow)
443 {
444   message(ParserMessages::groupTokenInvalidToken,
445 	  TokenMessageArg(token, grpMode, syntaxPointer(), sdPointer()),
446 	  AllowedGroupTokensMessageArg(allow, syntaxPointer()));
447 }
448 
449 
parseGroupConnector(const AllowedGroupConnectors & allow,unsigned declInputLevel,unsigned groupInputLevel,GroupConnector & gc)450 Boolean Parser::parseGroupConnector(const AllowedGroupConnectors &allow,
451 				    unsigned declInputLevel,
452 				    unsigned groupInputLevel,
453 				    GroupConnector &gc)
454 {
455   for (;;) {
456     Token token = getToken(grpMode);
457     switch (token) {
458     case tokenEe:
459       if (inputLevel() <= groupInputLevel) {
460 	message(ParserMessages::groupLevel);
461 	if (inputLevel() <= declInputLevel)
462 	  return 0;
463       }
464       if (currentMarkup())
465 	currentMarkup()->addEntityEnd();
466       popInputStack();
467       break;
468     case tokenS:
469       if (currentMarkup()) {
470 	extendS();
471 	currentMarkup()->addS(currentInput());
472       }
473       break;
474     case tokenPeroGrpo:
475       if (inInstance()) {
476 	message(ParserMessages::peroGrpoProlog);
477 	break;
478       }
479       // fall through
480     case tokenPeroNameStart:
481       if (!sd().www())
482 	message(ParserMessages::groupEntityReference);
483       else {
484 	ConstPtr<Entity> entity;
485 	Ptr<EntityOrigin> origin;
486 	if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
487 	  return 0;
488 	if (!entity.isNull())
489 	  entity->declReference(*this, origin);
490       }
491       break;
492     case tokenUnrecognized:
493       if (reportNonSgmlCharacter())
494 	break;
495       {
496 	message(ParserMessages::groupCharacter,
497 		StringMessageArg(currentToken()),
498 		AllowedGroupConnectorsMessageArg(allow, syntaxPointer()));
499       }
500       return 0;
501     case tokenAnd:
502       if (!allow.groupConnector(GroupConnector::andGC)) {
503 	groupConnectorInvalidToken(tokenAnd, allow);
504 	return 0;
505       }
506       gc.type = GroupConnector::andGC;
507       if (currentMarkup())
508 	currentMarkup()->addDelim(Syntax::dAND);
509       return 1;
510     case tokenSeq:
511       if (!allow.groupConnector(GroupConnector::seqGC)) {
512 	groupConnectorInvalidToken(tokenSeq, allow);
513 	return 0;
514       }
515       gc.type = GroupConnector::seqGC;
516       if (currentMarkup())
517 	currentMarkup()->addDelim(Syntax::dSEQ);
518       return 1;
519     case tokenOr:
520       if (!allow.groupConnector(GroupConnector::orGC)) {
521 	groupConnectorInvalidToken(tokenOr, allow);
522 	return 0;
523       }
524       gc.type = GroupConnector::orGC;
525       if (currentMarkup())
526 	currentMarkup()->addDelim(Syntax::dOR);
527       return 1;
528     case tokenDtgc:
529       if (!allow.groupConnector(GroupConnector::dtgcGC)) {
530 	groupConnectorInvalidToken(tokenDtgc, allow);
531 	return 0;
532       }
533       gc.type = GroupConnector::dtgcGC;
534       if (inputLevel() > groupInputLevel)
535 	message(ParserMessages::groupParameterEntityNotEnded);
536       if (currentMarkup())
537 	currentMarkup()->addDelim(Syntax::dDTGC);
538       return 1;
539     case tokenGrpc:
540       if (!allow.groupConnector(GroupConnector::grpcGC)) {
541 	groupConnectorInvalidToken(tokenGrpc, allow);
542 	return 0;
543       }
544       gc.type = GroupConnector::grpcGC;
545       if (inputLevel() > groupInputLevel)
546 	message(ParserMessages::groupParameterEntityNotEnded);
547       if (currentMarkup())
548 	currentMarkup()->addDelim(Syntax::dGRPC);
549       return 1;
550     default:
551       groupConnectorInvalidToken(token, allow);
552       return 0;
553     }
554   }
555 }
556 
groupConnectorInvalidToken(Token token,const AllowedGroupConnectors & allow)557 void Parser::groupConnectorInvalidToken(Token token,
558 					const AllowedGroupConnectors &allow)
559 {
560   message(ParserMessages::connectorInvalidToken,
561 	  TokenMessageArg(token, grpMode, syntaxPointer(), sdPointer()),
562 	  AllowedGroupConnectorsMessageArg(allow, syntaxPointer()));
563 }
564 
parseElementNameGroup(unsigned declInputLevel,Param & parm)565 Boolean Parser::parseElementNameGroup(unsigned declInputLevel, Param &parm)
566 {
567   if (!parseNameGroup(declInputLevel, parm))
568     return 0;
569   parm.elementVector.resize(parm.nameTokenVector.size());
570   for (size_t i = 0; i < parm.nameTokenVector.size(); i++)
571     parm.elementVector[i] = lookupCreateElement(parm.nameTokenVector[i].name);
572   return 1;
573 }
574 
parseEntityReferenceNameGroup(Boolean & ignore)575 Boolean Parser::parseEntityReferenceNameGroup(Boolean &ignore)
576 {
577   Param parm;
578   if (!parseNameGroup(inputLevel(), parm))
579     return 0;
580   if (inInstance()) {
581     for (size_t i = 0; i < parm.nameTokenVector.size(); i++) {
582       const Lpd *lpd = lookupLpd(parm.nameTokenVector[i].name).pointer();
583       if (lpd && lpd->active()) {
584 	ignore = 0;
585 	return 1;
586       }
587     }
588   }
589   ignore = 1;
590   return 1;
591 }
592 
parseTagNameGroup(Boolean & active)593 Boolean Parser::parseTagNameGroup(Boolean &active)
594 {
595   Param parm;
596   if (!parseNameGroup(inputLevel(), parm))
597     return 0;
598   active = 0;
599   return 1;
600 }
601 
parseNameGroup(unsigned declInputLevel,Param & parm)602 Boolean Parser::parseNameGroup(unsigned declInputLevel, Param &parm)
603 {
604   static AllowedGroupTokens allowName(GroupToken::name);
605   return parseGroup(allowName, declInputLevel, parm);
606 }
607 
parseNameTokenGroup(unsigned declInputLevel,Param & parm)608 Boolean Parser::parseNameTokenGroup(unsigned declInputLevel, Param &parm)
609 {
610   static AllowedGroupTokens allowNameToken(GroupToken::nameToken);
611   return parseGroup(allowNameToken, declInputLevel, parm);
612 }
613 
614 static
groupContains(const Vector<NameToken> & vec,const StringC & str)615 Boolean groupContains(const Vector<NameToken> &vec, const StringC &str)
616 {
617   for (size_t i = 0; i < vec.size(); i++)
618     if (vec[i].name == str)
619       return 1;
620   return 0;
621 }
622 
parseGroup(const AllowedGroupTokens & allowToken,unsigned declInputLevel,Param & parm)623 Boolean Parser::parseGroup(const AllowedGroupTokens &allowToken,
624 			   unsigned declInputLevel,
625 			   Param &parm)
626 {
627   unsigned groupInputLevel = inputLevel();
628   int nDuplicates = 0;
629   Vector<NameToken> &vec = parm.nameTokenVector;
630   vec.clear();
631   GroupConnector::Type connector = GroupConnector::grpcGC;
632   GroupToken gt;
633   for (;;) {
634     if (!parseGroupToken(allowToken, 0, declInputLevel, groupInputLevel, gt))
635       return 0;
636     if (groupContains(vec, gt.token)) {
637       nDuplicates++;
638       message(ParserMessages::duplicateGroupToken,
639 	      StringMessageArg(gt.token));
640     }
641     else {
642       vec.resize(vec.size() + 1);
643       gt.token.swap(vec.back().name);
644       getCurrentToken(vec.back().origName);
645       vec.back().loc = currentLocation();
646     }
647     GroupConnector gc;
648     static AllowedGroupConnectors allowAnyConnectorGrpc(GroupConnector::orGC,
649 							GroupConnector::andGC,
650 							GroupConnector::seqGC,
651 							GroupConnector::grpcGC);
652 
653     if (!parseGroupConnector(allowAnyConnectorGrpc, declInputLevel,
654 			     groupInputLevel, gc))
655       return 0;
656     if (gc.type == GroupConnector::grpcGC)
657       break;
658     if (options().warnNameGroupNotOr) {
659       if (gc.type != GroupConnector::orGC)
660 	message(ParserMessages::nameGroupNotOr);
661     }
662     else if (options().warnShould) {
663       if (connector == GroupConnector::grpcGC)
664 	connector = gc.type;
665       else if (gc.type != connector) {
666 	message(ParserMessages::mixedConnectors);
667 	connector = gc.type;
668       }
669     }
670   }
671   if (nDuplicates + vec.size() > syntax().grpcnt())
672     message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
673   return 1;
674 }
675 
parseDataTagGroup(unsigned nestingLevel,unsigned declInputLevel,GroupToken & result)676 Boolean Parser::parseDataTagGroup(unsigned nestingLevel,
677 				  unsigned declInputLevel, GroupToken &result)
678 {
679   if (nestingLevel - 1 == syntax().grplvl())
680     message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
681   unsigned groupInputLevel = inputLevel();
682   GroupToken gt;
683   static AllowedGroupTokens allowName(GroupToken::name);
684   if (!parseGroupToken(allowName, nestingLevel, declInputLevel,
685 		       groupInputLevel, gt))
686     return 0;
687   const ElementType *element = lookupCreateElement(gt.token);
688   GroupConnector gc;
689   static AllowedGroupConnectors allowSeq(GroupConnector::seqGC);
690   if (!parseGroupConnector(allowSeq, declInputLevel, groupInputLevel, gc))
691     return 0;
692   static AllowedGroupTokens
693     allowDataTagLiteralDataTagTemplateGroup(GroupToken::dataTagLiteral,
694 					    GroupToken::dataTagTemplateGroup);
695   if (!parseGroupToken(allowDataTagLiteralDataTagTemplateGroup,
696 		       nestingLevel,
697 		       declInputLevel,
698 		       groupInputLevel,
699 		       gt))
700     return 0;
701   Vector<Text> templates;
702   if (gt.type == GroupToken::dataTagTemplateGroup)
703     gt.textVector.swap(templates);
704   else {
705     templates.resize(1);
706     gt.text.swap(templates[0]);
707   }
708   static AllowedGroupConnectors allowSeqDtgc(GroupConnector::seqGC,
709 					     GroupConnector::dtgcGC);
710   if (!parseGroupConnector(allowSeqDtgc, declInputLevel, groupInputLevel, gc))
711     return 0;
712   NCVector<Owner<ContentToken> > vec(2);
713   vec[1] = new PcdataToken;
714   if (gc.type != GroupConnector::dtgcGC) {
715     static AllowedGroupTokens allowDataTagLiteral(GroupToken::dataTagLiteral);
716     if (!parseGroupToken(allowDataTagLiteral,
717 			 nestingLevel,
718 			 declInputLevel,
719 			 groupInputLevel,
720 			 gt))
721       return 0;
722     vec[0] = new DataTagElementToken(element, templates, gt.text);
723     static AllowedGroupConnectors allowDtgc(GroupConnector::dtgcGC);
724     if (!parseGroupConnector(allowDtgc, declInputLevel, groupInputLevel, gc))
725       return 0;
726   }
727   else
728     vec[0] = new DataTagElementToken(element, templates);
729   ContentToken::OccurrenceIndicator oi = getOccurrenceIndicator(grpMode);
730   result.contentToken = new DataTagGroup(vec, oi);
731   result.type = GroupToken::dataTagGroup;
732   return 1;
733 }
734 
parseDataTagTemplateGroup(unsigned nestingLevel,unsigned declInputLevel,GroupToken & result)735 Boolean Parser::parseDataTagTemplateGroup(unsigned nestingLevel,
736 					  unsigned declInputLevel,
737 					  GroupToken &result)
738 {
739   if (nestingLevel - 1 == syntax().grplvl())
740     message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
741   unsigned groupInputLevel = inputLevel();
742   Vector<Text> &vec = result.textVector;
743   for (;;) {
744     GroupToken gt;
745     static AllowedGroupTokens allowDataTagLiteral(GroupToken::dataTagLiteral);
746     if (!parseGroupToken(allowDataTagLiteral,
747 			 nestingLevel,
748 			 declInputLevel,
749 			 groupInputLevel,
750 			 gt))
751       return 0;
752     if (vec.size() == syntax().grpcnt())
753       message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
754     vec.resize(vec.size() + 1);
755     gt.text.swap(vec.back());
756     static AllowedGroupConnectors allowOrGrpc(GroupConnector::orGC,
757 					      GroupConnector::grpcGC);
758     GroupConnector gc;
759     if (!parseGroupConnector(allowOrGrpc, declInputLevel, groupInputLevel, gc))
760       return 0;
761     if (gc.type == GroupConnector::grpcGC)
762       break;
763   }
764   return 1;
765 }
766 
parseModelGroup(unsigned nestingLevel,unsigned declInputLevel,ModelGroup * & group,Mode oiMode)767 Boolean Parser::parseModelGroup(unsigned nestingLevel, unsigned declInputLevel,
768 				ModelGroup *&group, Mode oiMode)
769 {
770   if (nestingLevel - 1 == syntax().grplvl())
771     message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
772   unsigned groupInputLevel = inputLevel();
773   GroupToken gt;
774   NCVector<Owner<ContentToken> > tokenVector;
775   GroupConnector::Type connector = GroupConnector::grpcGC;
776 
777   static AllowedGroupTokens allowContentToken(GroupToken::pcdata,
778 					      GroupToken::dataTagGroup,
779 					      GroupToken::elementToken,
780 					      GroupToken::modelGroup);
781   static AllowedGroupConnectors allowAnyConnectorGrpc(GroupConnector::orGC,
782 						      GroupConnector::andGC,
783 						      GroupConnector::seqGC,
784 						      GroupConnector::grpcGC);
785 
786   static AllowedGroupConnectors allowOrGrpc(GroupConnector::orGC,
787 					    GroupConnector::grpcGC);
788   static AllowedGroupConnectors allowAndGrpc(GroupConnector::andGC,
789 					     GroupConnector::grpcGC);
790   static AllowedGroupConnectors allowSeqGrpc(GroupConnector::seqGC,
791 					     GroupConnector::grpcGC);
792   const AllowedGroupConnectors *connectorp = &allowAnyConnectorGrpc;
793 
794   GroupConnector gc;
795   Boolean pcdataCheck = 0;
796   do {
797     if (!parseGroupToken(allowContentToken, nestingLevel, declInputLevel,
798 			 groupInputLevel, gt))
799       return 0;
800     ContentToken *contentToken;
801     if (gt.type == GroupToken::modelGroup)
802       contentToken = gt.model.extract();
803     else
804       contentToken = gt.contentToken.extract();
805     if (tokenVector.size() == syntax().grpcnt())
806       message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
807     tokenVector.resize(tokenVector.size() + 1);
808     tokenVector.back() = contentToken;
809     if (!parseGroupConnector(*connectorp, declInputLevel, groupInputLevel, gc))
810       return 0;
811     if (options().warnMixedContentRepOrGroup && gt.type == GroupToken::pcdata) {
812       if (tokenVector.size() != 1)
813 	message(ParserMessages::pcdataNotFirstInGroup);
814       else if (gc.type == GroupConnector::seqGC)
815 	message(ParserMessages::pcdataInSeqGroup);
816       else
817 	pcdataCheck = 1;
818       if (nestingLevel != 1)
819 	message(ParserMessages::pcdataInNestedModelGroup);
820     }
821     else if (pcdataCheck) {
822       if (gt.type == GroupToken::modelGroup)
823 	message(ParserMessages::pcdataGroupMemberModelGroup);
824       if (contentToken->occurrenceIndicator() != ContentToken::none)
825 	message(ParserMessages::pcdataGroupMemberOccurrenceIndicator);
826     }
827     if (tokenVector.size() == 1) {
828       connector = gc.type;
829       switch (gc.type) {
830       case GroupConnector::orGC:
831 	connectorp = &allowOrGrpc;
832 	break;
833       case GroupConnector::seqGC:
834 	connectorp = &allowSeqGrpc;
835 	break;
836       case GroupConnector::andGC:
837 	connectorp = &allowAndGrpc;
838 	if (options().warnAndGroup)
839 	  message(ParserMessages::andGroup);
840 	break;
841       default:
842 	break;
843       }
844     }
845   } while (gc.type != GroupConnector::grpcGC);
846   ContentToken::OccurrenceIndicator oi
847     = getOccurrenceIndicator(oiMode);
848   switch (connector) {
849   case GroupConnector::orGC:
850     group = new OrModelGroup(tokenVector, oi);
851     if (pcdataCheck && oi != ContentToken::rep)
852       message(ParserMessages::pcdataGroupNotRep);
853     break;
854   case GroupConnector::grpcGC:
855     if (pcdataCheck && oi != ContentToken::rep && oi != ContentToken::none)
856       message(ParserMessages::pcdataGroupNotRep);
857     // fall through
858   case GroupConnector::seqGC:
859     group = new SeqModelGroup(tokenVector, oi);
860     break;
861   case GroupConnector::andGC:
862     group = new AndModelGroup(tokenVector, oi);
863     break;
864   default:
865     break;
866   }
867   return 1;
868 }
869 
870 ContentToken::OccurrenceIndicator
getOccurrenceIndicator(Mode oiMode)871 Parser::getOccurrenceIndicator(Mode oiMode)
872 {
873   Token token = getToken(oiMode);
874   switch (token) {
875   case tokenPlus:
876     if (currentMarkup())
877       currentMarkup()->addDelim(Syntax::dPLUS);
878     return ContentToken::plus;
879   case tokenOpt:
880     if (currentMarkup())
881       currentMarkup()->addDelim(Syntax::dOPT);
882    return ContentToken::opt;
883   case tokenRep:
884     if (currentMarkup())
885       currentMarkup()->addDelim(Syntax::dREP);
886     return ContentToken::rep;
887   default:
888     currentInput()->ungetToken();
889     return ContentToken::none;
890   }
891 }
892 
parseMinimumLiteral(Boolean lita,Text & text)893 Boolean Parser::parseMinimumLiteral(Boolean lita, Text &text)
894 {
895   return parseLiteral(lita ? mlitaMode : mlitMode, mlitMode,
896 		      Syntax::referenceQuantity(Syntax::qLITLEN),
897 		      ParserMessages::minimumLiteralLength,
898 		      literalSingleSpace|literalMinimumData
899 		      |(eventsWanted().wantPrologMarkup()
900 			? literalDelimInfo
901 			: 0),
902 		      text);
903 }
904 
parseSystemIdentifier(Boolean lita,Text & text)905 Boolean Parser::parseSystemIdentifier(Boolean lita, Text &text)
906 {
907   return parseLiteral(lita ? slitaMode : slitMode, slitMode, syntax().litlen(),
908 		      ParserMessages::systemIdentifierLength,
909 		      (eventsWanted().wantPrologMarkup()
910 			? literalDelimInfo
911 			: 0), text);
912 }
913 
parseParameterLiteral(Boolean lita,Text & text)914 Boolean Parser::parseParameterLiteral(Boolean lita, Text &text)
915 {
916   return parseLiteral(lita ? plitaMode : plitMode, pliteMode, syntax().litlen(),
917 		      ParserMessages::parameterLiteralLength,
918 		      (eventsWanted().wantPrologMarkup()
919 		       ? literalDelimInfo
920 		       : 0),
921 		      text);
922 }
923 
parseDataTagParameterLiteral(Boolean lita,Text & text)924 Boolean Parser::parseDataTagParameterLiteral(Boolean lita, Text &text)
925 {
926   return parseLiteral(lita ? plitaMode : plitMode, pliteMode,
927 		      syntax().dtemplen(),
928 		      ParserMessages::dataTagPatternLiteralLength,
929 		      literalDataTag
930 		      | (eventsWanted().wantPrologMarkup()
931 			 ? literalDelimInfo
932 			 : 0),
933 		      text);
934 }
935 
parseIndicatedReservedName(const AllowedParams & allow,Param & parm)936 Boolean Parser::parseIndicatedReservedName(const AllowedParams &allow,
937 					   Param &parm)
938 {
939   Syntax::ReservedName rn;
940   if (!getIndicatedReservedName(&rn))
941     return 0;
942   if (!allow.reservedName(rn)) {
943     message(ParserMessages::invalidReservedName,
944 	    StringMessageArg(currentToken()));
945     return 0;
946   }
947   parm.type = Param::indicatedReservedName + rn;
948   return 1;
949 }
950 
parseReservedName(const AllowedParams & allow,Param & parm)951 Boolean Parser::parseReservedName(const AllowedParams &allow,
952 				  Param &parm)
953 {
954   Syntax::ReservedName rn;
955   if (!getReservedName(&rn))
956     return 0;
957   if (!allow.reservedName(rn)) {
958     message(ParserMessages::invalidReservedName,
959 	    StringMessageArg(syntax().reservedName(rn)));
960     return 0;
961   }
962   parm.type = Param::reservedName + rn;
963   return 1;
964 }
965 
966 
parseAttributeValueParam(Param & parm)967 Boolean Parser::parseAttributeValueParam(Param &parm)
968 {
969   extendNameToken(syntax().litlen() > syntax().normsep()
970 		  ? syntax().litlen() - syntax().normsep()
971 		  : 0,
972 		  ParserMessages::attributeValueLength);
973   parm.type = Param::attributeValue;
974   Text text;
975   text.addChars(currentInput()->currentTokenStart(),
976 		currentInput()->currentTokenLength(),
977 		currentLocation());
978   text.swap(parm.literalText);
979   if (currentMarkup())
980     currentMarkup()->addAttributeValue(currentInput());
981   return 1;
982 }
983 
getIndicatedReservedName(Syntax::ReservedName * result)984 Boolean Parser::getIndicatedReservedName(Syntax::ReservedName *result)
985 {
986   if (currentMarkup())
987     currentMarkup()->addDelim(Syntax::dRNI);
988   InputSource *in = currentInput();
989   in->startToken();
990   if (!syntax().isNameStartCharacter(in->tokenChar(messenger()))) {
991     message(ParserMessages::rniNameStart);
992     return 0;
993   }
994   extendNameToken(syntax().namelen(), ParserMessages::nameLength);
995   StringC &buffer = nameBuffer();
996   getCurrentToken(syntax().generalSubstTable(), buffer);
997   if (!syntax().lookupReservedName(buffer, result)) {
998     message(ParserMessages::noSuchReservedName, StringMessageArg(buffer));
999     return 0;
1000   }
1001   if (currentMarkup())
1002     currentMarkup()->addReservedName(*result, currentInput());
1003   return 1;
1004 }
1005 
getReservedName(Syntax::ReservedName * result)1006 Boolean Parser::getReservedName(Syntax::ReservedName *result)
1007 {
1008   extendNameToken(syntax().namelen(), ParserMessages::nameLength);
1009   StringC &buffer = nameBuffer();
1010   getCurrentToken(syntax().generalSubstTable(), buffer);
1011   if (!syntax().lookupReservedName(buffer, result)) {
1012     message(ParserMessages::noSuchReservedName, StringMessageArg(buffer));
1013     return 0;
1014   }
1015   if (currentMarkup())
1016     currentMarkup()->addReservedName(*result, currentInput());
1017   return 1;
1018 }
1019 
1020 
1021 #ifdef SP_NAMESPACE
1022 }
1023 #endif
1024