1 // Copyright (c) 1994 James Clark
2 // See the file COPYING for copying permission.
3
4 #include "splib.h"
5 #include "Parser.h"
6 #include "Param.h"
7 #include "Group.h"
8 #include "Markup.h"
9 #include "ParserMessages.h"
10 #include "MessageArg.h"
11 #include "TokenMessageArg.h"
12 #include "token.h"
13 #include "macros.h"
14
15 #ifdef SP_NAMESPACE
16 namespace SP_NAMESPACE {
17 #endif
18
parseParam(const AllowedParams & allow,unsigned declInputLevel,Param & parm)19 Boolean Parser::parseParam(const AllowedParams &allow,
20 unsigned declInputLevel,
21 Param &parm)
22 {
23 for (;;) {
24 Token token = getToken(allow.mainMode());
25 switch (token) {
26 case tokenUnrecognized:
27 if (reportNonSgmlCharacter())
28 break;
29 {
30 message(ParserMessages::markupDeclarationCharacter,
31 StringMessageArg(currentToken()),
32 AllowedParamsMessageArg(allow, syntaxPointer()));
33 }
34 return 0;
35 case tokenEe:
36 if (inputLevel() <= declInputLevel) {
37 message(ParserMessages::declarationLevel);
38 return 0;
39 }
40 if (currentMarkup())
41 currentMarkup()->addEntityEnd();
42 popInputStack();
43 break;
44 case tokenCom:
45 if (!parseComment(comMode))
46 return 0;
47 if (options().warnPsComment)
48 message(ParserMessages::psComment);
49 break;
50 case tokenDso:
51 if (!allow.dso()) {
52 paramInvalidToken(tokenDso, allow);
53 return 0;
54 }
55 if (currentMarkup())
56 currentMarkup()->addDelim(Syntax::dDSO);
57 parm.type = Param::dso;
58 return 1;
59 case tokenGrpo:
60 if (currentMarkup())
61 currentMarkup()->addDelim(Syntax::dGRPO);
62 switch (allow.group()) {
63 case Param::invalid:
64 paramInvalidToken(tokenGrpo, allow);
65 return 0;
66 case Param::modelGroup:
67 {
68 ModelGroup *group;
69 if (!parseModelGroup(1, declInputLevel, group, grpsufMode))
70 return 0;
71 parm.type = Param::modelGroup;
72 parm.modelGroupPtr = group;
73 }
74 break;
75 case Param::nameGroup:
76 if (!parseNameGroup(declInputLevel, parm))
77 return 0;
78 break;
79 case Param::nameTokenGroup:
80 if (!parseNameTokenGroup(declInputLevel, parm))
81 return 0;
82 break;
83 default:
84 CANNOT_HAPPEN();
85 }
86 parm.type = allow.group();
87 return 1;
88 case tokenLita:
89 case tokenLit:
90 parm.type = allow.literal();
91 parm.lita = token == tokenLita;
92 switch (allow.literal()) {
93 case Param::invalid:
94 paramInvalidToken(token, allow);
95 return 0;
96 case Param::minimumLiteral:
97 if (!parseMinimumLiteral(parm.lita, parm.literalText))
98 return 0;
99 break;
100 case Param::attributeValueLiteral:
101 if (!parseAttributeValueLiteral(parm.lita, parm.literalText))
102 return 0;
103 break;
104 case Param::tokenizedAttributeValueLiteral:
105 if (!parseTokenizedAttributeValueLiteral(parm.lita, parm.literalText))
106 return 0;
107 break;
108 case Param::systemIdentifier:
109 if (!parseSystemIdentifier(parm.lita, parm.literalText))
110 return 0;
111 break;
112 case Param::paramLiteral:
113 if (!parseParameterLiteral(parm.lita, parm.literalText))
114 return 0;
115 break;
116 }
117 if (currentMarkup())
118 currentMarkup()->addLiteral(parm.literalText);
119 return 1;
120 case tokenMdc:
121 if (!allow.mdc()) {
122 paramInvalidToken(tokenMdc, allow);
123 return 0;
124 }
125 if (inputLevel() > declInputLevel)
126 message(ParserMessages::parameterEntityNotEnded);
127 if (currentMarkup())
128 currentMarkup()->addDelim(Syntax::dMDC);
129 parm.type = Param::mdc;
130 return 1;
131 case tokenMinus:
132 parm.type = Param::minus;
133 if (currentMarkup())
134 currentMarkup()->addDelim(Syntax::dMINUS);
135 return 1;
136 case tokenMinusGrpo:
137 if (!allow.exclusions()) {
138 paramInvalidToken(tokenMinusGrpo, allow);
139 return 0;
140 }
141 if (currentMarkup()) {
142 currentMarkup()->addDelim(Syntax::dMINUS);
143 currentMarkup()->addDelim(Syntax::dGRPO);
144 }
145 parm.type = Param::exclusions;
146 return parseElementNameGroup(declInputLevel, parm);
147 case tokenPero:
148 parm.type = Param::pero;
149 if (currentMarkup())
150 currentMarkup()->addDelim(Syntax::dPERO);
151 return 1;
152 case tokenPeroGrpo:
153 if (!inInstance())
154 message(ParserMessages::peroGrpoProlog);
155 // fall through
156 case tokenPeroNameStart:
157 {
158 if (inInstance()) {
159 if (options().warnInstanceParamEntityRef)
160 message(ParserMessages::instanceParamEntityRef);
161 }
162 else {
163 if (options().warnInternalSubsetPsParamEntityRef && inputLevel() == 1)
164 message(ParserMessages::internalSubsetPsParamEntityRef);
165 }
166 ConstPtr<Entity> entity;
167 Ptr<EntityOrigin> origin;
168 if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
169 return 0;
170 if (!entity.isNull())
171 entity->declReference(*this, origin);
172 }
173 break;
174 case tokenPlusGrpo:
175 if (!allow.inclusions()) {
176 paramInvalidToken(tokenPlusGrpo, allow);
177 return 0;
178 }
179 if (currentMarkup()) {
180 currentMarkup()->addDelim(Syntax::dPLUS);
181 currentMarkup()->addDelim(Syntax::dGRPO);
182 }
183 parm.type = Param::inclusions;
184 return parseElementNameGroup(declInputLevel, parm);
185 case tokenRni:
186 if (!allow.rni()) {
187 paramInvalidToken(tokenRni, allow);
188 return 0;
189 }
190 return parseIndicatedReservedName(allow, parm);
191 case tokenS:
192 if (currentMarkup())
193 currentMarkup()->addS(currentChar());
194 break;
195 case tokenNameStart:
196 switch (allow.nameStart()) {
197 case Param::invalid:
198 paramInvalidToken(tokenNameStart, allow);
199 return 0;
200 case Param::reservedName:
201 return parseReservedName(allow, parm);
202 case Param::name:
203 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
204 parm.type = Param::name;
205 getCurrentToken(syntax().generalSubstTable(), parm.token);
206 if (currentMarkup())
207 currentMarkup()->addName(currentInput());
208 return 1;
209 case Param::entityName:
210 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
211 parm.type = Param::entityName;
212 getCurrentToken(syntax().entitySubstTable(), parm.token);
213 if (currentMarkup())
214 currentMarkup()->addName(currentInput());
215 return 1;
216 case Param::paramEntityName:
217 extendNameToken(syntax().penamelen(),
218 ParserMessages::parameterEntityNameLength);
219 parm.type = Param::paramEntityName;
220 getCurrentToken(syntax().entitySubstTable(), parm.token);
221 if (currentMarkup())
222 currentMarkup()->addName(currentInput());
223 return 1;
224 case Param::attributeValue:
225 return parseAttributeValueParam(parm);
226 }
227 break;
228 case tokenDigit:
229 switch (allow.digit()) {
230 case Param::invalid:
231 paramInvalidToken(tokenDigit, allow);
232 return 0;
233 case Param::number:
234 extendNumber(syntax().namelen(), ParserMessages::numberLength);
235 parm.type = Param::number;
236 getCurrentToken(parm.token);
237 if (currentMarkup())
238 currentMarkup()->addNumber(currentInput());
239 return 1;
240 case Param::attributeValue:
241 return parseAttributeValueParam(parm);
242 }
243 break;
244 case tokenLcUcNmchar:
245 switch (allow.nmchar()) {
246 case Param::invalid:
247 paramInvalidToken(tokenLcUcNmchar, allow);
248 return 0;
249 case Param::attributeValue:
250 return parseAttributeValueParam(parm);
251 }
252 break;
253 default:
254 CANNOT_HAPPEN();
255 }
256 }
257 }
258
paramInvalidToken(Token token,const AllowedParams & allow)259 void Parser::paramInvalidToken(Token token, const AllowedParams &allow)
260 {
261 message(ParserMessages::paramInvalidToken,
262 TokenMessageArg(token, allow.mainMode(),
263 syntaxPointer(), sdPointer()),
264 AllowedParamsMessageArg(allow, syntaxPointer()));
265 }
266
parseGroupToken(const AllowedGroupTokens & allow,unsigned nestingLevel,unsigned declInputLevel,unsigned groupInputLevel,GroupToken & gt)267 Boolean Parser::parseGroupToken(const AllowedGroupTokens &allow,
268 unsigned nestingLevel,
269 unsigned declInputLevel,
270 unsigned groupInputLevel,
271 GroupToken >)
272 {
273 for (;;) {
274 Token token = getToken(grpMode);
275 switch (token) {
276 case tokenEe:
277 if (inputLevel() <= groupInputLevel) {
278 message(ParserMessages::groupLevel);
279 if (inputLevel() <= declInputLevel)
280 return 0;
281 }
282 else if (!sd().www())
283 message(ParserMessages::groupEntityEnd);
284 if (currentMarkup())
285 currentMarkup()->addEntityEnd();
286 popInputStack();
287 break;
288 case tokenPeroGrpo:
289 if (!inInstance())
290 message(ParserMessages::peroGrpoProlog);
291 // fall through
292 case tokenPeroNameStart:
293 {
294 if (options().warnInternalSubsetTsParamEntityRef && inputLevel() == 1)
295 message(ParserMessages::internalSubsetTsParamEntityRef);
296 ConstPtr<Entity> entity;
297 Ptr<EntityOrigin> origin;
298 if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
299 return 0;
300 if (!entity.isNull())
301 entity->declReference(*this, origin);
302 }
303 break;
304 case tokenUnrecognized:
305 if (reportNonSgmlCharacter())
306 break;
307 {
308 message(ParserMessages::groupCharacter,
309 StringMessageArg(currentToken()),
310 AllowedGroupTokensMessageArg(allow, syntaxPointer()));
311 }
312 return 0;
313 case tokenDtgo:
314 if (!allow.groupToken(GroupToken::dataTagGroup)) {
315 groupTokenInvalidToken(tokenDtgo, allow);
316 return 0;
317 }
318 if (sd().datatag())
319 message(ParserMessages::datatagNotImplemented);
320 if (currentMarkup())
321 currentMarkup()->addDelim(Syntax::dDTGO);
322 return parseDataTagGroup(nestingLevel + 1, declInputLevel, gt);
323 case tokenGrpo:
324 if (currentMarkup())
325 currentMarkup()->addDelim(Syntax::dGRPO);
326 switch (allow.group()) {
327 case GroupToken::modelGroup:
328 {
329 ModelGroup *modelGroup;
330 if (!parseModelGroup(nestingLevel + 1, declInputLevel, modelGroup,
331 grpMode))
332 return 0;
333 gt.model = modelGroup;
334 gt.type = GroupToken::modelGroup;
335 return 1;
336 }
337 case GroupToken::dataTagTemplateGroup:
338 return parseDataTagTemplateGroup(nestingLevel + 1, declInputLevel, gt);
339 default:
340 groupTokenInvalidToken(tokenGrpo, allow);
341 return 0;
342 }
343 break;
344 case tokenRni:
345 if (!allow.groupToken(GroupToken::pcdata)) {
346 groupTokenInvalidToken(tokenRni, allow);
347 return 0;
348 }
349 Syntax::ReservedName rn;
350 if (!getIndicatedReservedName(&rn))
351 return 0;
352 if (rn != Syntax::rPCDATA) {
353 StringC token(syntax().delimGeneral(Syntax::dRNI));
354 token += syntax().reservedName(Syntax::rPCDATA);
355 message(ParserMessages::invalidToken, StringMessageArg(token));
356 return 0;
357 }
358 gt.type = GroupToken::pcdata;
359 gt.contentToken = new PcdataToken;
360 return 1;
361 case tokenS:
362 if (currentMarkup()) {
363 extendS();
364 currentMarkup()->addS(currentInput());
365 }
366 break;
367 case tokenNameStart:
368 switch (allow.nameStart()) {
369 case GroupToken::elementToken:
370 {
371 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
372 gt.type = GroupToken::elementToken;
373 StringC &buffer = nameBuffer();
374 getCurrentToken(syntax().generalSubstTable(), buffer);
375 if (currentMarkup())
376 currentMarkup()->addName(currentInput());
377 const ElementType *e = lookupCreateElement(buffer);
378 ContentToken::OccurrenceIndicator oi
379 = getOccurrenceIndicator(grpMode);
380 gt.contentToken = new ElementToken(e, oi);
381 return 1;
382 }
383 case GroupToken::name:
384 case GroupToken::nameToken:
385 extendNameToken(syntax().namelen(),
386 token == GroupToken::name
387 ? ParserMessages::nameLength
388 : ParserMessages::nameTokenLength);
389 getCurrentToken(syntax().generalSubstTable(), gt.token);
390 gt.type = allow.nameStart();
391 if (currentMarkup()) {
392 if (gt.type == GroupToken::nameToken)
393 currentMarkup()->addNameToken(currentInput());
394 else
395 currentMarkup()->addName(currentInput());
396 }
397 return 1;
398 default:
399 groupTokenInvalidToken(tokenNameStart, allow);
400 return 0;
401 }
402 case tokenDigit:
403 case tokenLcUcNmchar:
404 if (!allow.groupToken(GroupToken::nameToken)) {
405 groupTokenInvalidToken(token, allow);
406 return 0;
407 }
408 extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
409 getCurrentToken(syntax().generalSubstTable(), gt.token);
410 gt.type = GroupToken::nameToken;
411 if (currentMarkup())
412 currentMarkup()->addNameToken(currentInput());
413 return 1;
414 case tokenLit:
415 case tokenLita:
416 // parameter literal in data tag pattern
417 if (!allow.groupToken(GroupToken::dataTagLiteral)) {
418 groupTokenInvalidToken(token, allow);
419 return 0;
420 }
421 if (!parseDataTagParameterLiteral(token == tokenLita, gt.text))
422 return 0;
423 gt.type = GroupToken::dataTagLiteral;
424 if (currentMarkup())
425 currentMarkup()->addLiteral(gt.text);
426 return 1;
427 case tokenAnd:
428 case tokenSeq:
429 case tokenOr:
430 case tokenDtgc:
431 case tokenGrpc:
432 case tokenOpt:
433 case tokenPlus:
434 case tokenRep:
435 groupTokenInvalidToken(token, allow);
436 return 0;
437 }
438 }
439 }
440
441
groupTokenInvalidToken(Token token,const AllowedGroupTokens & allow)442 void Parser::groupTokenInvalidToken(Token token, const AllowedGroupTokens &allow)
443 {
444 message(ParserMessages::groupTokenInvalidToken,
445 TokenMessageArg(token, grpMode, syntaxPointer(), sdPointer()),
446 AllowedGroupTokensMessageArg(allow, syntaxPointer()));
447 }
448
449
parseGroupConnector(const AllowedGroupConnectors & allow,unsigned declInputLevel,unsigned groupInputLevel,GroupConnector & gc)450 Boolean Parser::parseGroupConnector(const AllowedGroupConnectors &allow,
451 unsigned declInputLevel,
452 unsigned groupInputLevel,
453 GroupConnector &gc)
454 {
455 for (;;) {
456 Token token = getToken(grpMode);
457 switch (token) {
458 case tokenEe:
459 if (inputLevel() <= groupInputLevel) {
460 message(ParserMessages::groupLevel);
461 if (inputLevel() <= declInputLevel)
462 return 0;
463 }
464 if (currentMarkup())
465 currentMarkup()->addEntityEnd();
466 popInputStack();
467 break;
468 case tokenS:
469 if (currentMarkup()) {
470 extendS();
471 currentMarkup()->addS(currentInput());
472 }
473 break;
474 case tokenPeroGrpo:
475 if (inInstance()) {
476 message(ParserMessages::peroGrpoProlog);
477 break;
478 }
479 // fall through
480 case tokenPeroNameStart:
481 if (!sd().www())
482 message(ParserMessages::groupEntityReference);
483 else {
484 ConstPtr<Entity> entity;
485 Ptr<EntityOrigin> origin;
486 if (!parseEntityReference(1, token == tokenPeroGrpo, entity, origin))
487 return 0;
488 if (!entity.isNull())
489 entity->declReference(*this, origin);
490 }
491 break;
492 case tokenUnrecognized:
493 if (reportNonSgmlCharacter())
494 break;
495 {
496 message(ParserMessages::groupCharacter,
497 StringMessageArg(currentToken()),
498 AllowedGroupConnectorsMessageArg(allow, syntaxPointer()));
499 }
500 return 0;
501 case tokenAnd:
502 if (!allow.groupConnector(GroupConnector::andGC)) {
503 groupConnectorInvalidToken(tokenAnd, allow);
504 return 0;
505 }
506 gc.type = GroupConnector::andGC;
507 if (currentMarkup())
508 currentMarkup()->addDelim(Syntax::dAND);
509 return 1;
510 case tokenSeq:
511 if (!allow.groupConnector(GroupConnector::seqGC)) {
512 groupConnectorInvalidToken(tokenSeq, allow);
513 return 0;
514 }
515 gc.type = GroupConnector::seqGC;
516 if (currentMarkup())
517 currentMarkup()->addDelim(Syntax::dSEQ);
518 return 1;
519 case tokenOr:
520 if (!allow.groupConnector(GroupConnector::orGC)) {
521 groupConnectorInvalidToken(tokenOr, allow);
522 return 0;
523 }
524 gc.type = GroupConnector::orGC;
525 if (currentMarkup())
526 currentMarkup()->addDelim(Syntax::dOR);
527 return 1;
528 case tokenDtgc:
529 if (!allow.groupConnector(GroupConnector::dtgcGC)) {
530 groupConnectorInvalidToken(tokenDtgc, allow);
531 return 0;
532 }
533 gc.type = GroupConnector::dtgcGC;
534 if (inputLevel() > groupInputLevel)
535 message(ParserMessages::groupParameterEntityNotEnded);
536 if (currentMarkup())
537 currentMarkup()->addDelim(Syntax::dDTGC);
538 return 1;
539 case tokenGrpc:
540 if (!allow.groupConnector(GroupConnector::grpcGC)) {
541 groupConnectorInvalidToken(tokenGrpc, allow);
542 return 0;
543 }
544 gc.type = GroupConnector::grpcGC;
545 if (inputLevel() > groupInputLevel)
546 message(ParserMessages::groupParameterEntityNotEnded);
547 if (currentMarkup())
548 currentMarkup()->addDelim(Syntax::dGRPC);
549 return 1;
550 default:
551 groupConnectorInvalidToken(token, allow);
552 return 0;
553 }
554 }
555 }
556
groupConnectorInvalidToken(Token token,const AllowedGroupConnectors & allow)557 void Parser::groupConnectorInvalidToken(Token token,
558 const AllowedGroupConnectors &allow)
559 {
560 message(ParserMessages::connectorInvalidToken,
561 TokenMessageArg(token, grpMode, syntaxPointer(), sdPointer()),
562 AllowedGroupConnectorsMessageArg(allow, syntaxPointer()));
563 }
564
parseElementNameGroup(unsigned declInputLevel,Param & parm)565 Boolean Parser::parseElementNameGroup(unsigned declInputLevel, Param &parm)
566 {
567 if (!parseNameGroup(declInputLevel, parm))
568 return 0;
569 parm.elementVector.resize(parm.nameTokenVector.size());
570 for (size_t i = 0; i < parm.nameTokenVector.size(); i++)
571 parm.elementVector[i] = lookupCreateElement(parm.nameTokenVector[i].name);
572 return 1;
573 }
574
parseEntityReferenceNameGroup(Boolean & ignore)575 Boolean Parser::parseEntityReferenceNameGroup(Boolean &ignore)
576 {
577 Param parm;
578 if (!parseNameGroup(inputLevel(), parm))
579 return 0;
580 if (inInstance()) {
581 for (size_t i = 0; i < parm.nameTokenVector.size(); i++) {
582 const Lpd *lpd = lookupLpd(parm.nameTokenVector[i].name).pointer();
583 if (lpd && lpd->active()) {
584 ignore = 0;
585 return 1;
586 }
587 }
588 }
589 ignore = 1;
590 return 1;
591 }
592
parseTagNameGroup(Boolean & active)593 Boolean Parser::parseTagNameGroup(Boolean &active)
594 {
595 Param parm;
596 if (!parseNameGroup(inputLevel(), parm))
597 return 0;
598 active = 0;
599 return 1;
600 }
601
parseNameGroup(unsigned declInputLevel,Param & parm)602 Boolean Parser::parseNameGroup(unsigned declInputLevel, Param &parm)
603 {
604 static AllowedGroupTokens allowName(GroupToken::name);
605 return parseGroup(allowName, declInputLevel, parm);
606 }
607
parseNameTokenGroup(unsigned declInputLevel,Param & parm)608 Boolean Parser::parseNameTokenGroup(unsigned declInputLevel, Param &parm)
609 {
610 static AllowedGroupTokens allowNameToken(GroupToken::nameToken);
611 return parseGroup(allowNameToken, declInputLevel, parm);
612 }
613
614 static
groupContains(const Vector<NameToken> & vec,const StringC & str)615 Boolean groupContains(const Vector<NameToken> &vec, const StringC &str)
616 {
617 for (size_t i = 0; i < vec.size(); i++)
618 if (vec[i].name == str)
619 return 1;
620 return 0;
621 }
622
parseGroup(const AllowedGroupTokens & allowToken,unsigned declInputLevel,Param & parm)623 Boolean Parser::parseGroup(const AllowedGroupTokens &allowToken,
624 unsigned declInputLevel,
625 Param &parm)
626 {
627 unsigned groupInputLevel = inputLevel();
628 int nDuplicates = 0;
629 Vector<NameToken> &vec = parm.nameTokenVector;
630 vec.clear();
631 GroupConnector::Type connector = GroupConnector::grpcGC;
632 GroupToken gt;
633 for (;;) {
634 if (!parseGroupToken(allowToken, 0, declInputLevel, groupInputLevel, gt))
635 return 0;
636 if (groupContains(vec, gt.token)) {
637 nDuplicates++;
638 message(ParserMessages::duplicateGroupToken,
639 StringMessageArg(gt.token));
640 }
641 else {
642 vec.resize(vec.size() + 1);
643 gt.token.swap(vec.back().name);
644 getCurrentToken(vec.back().origName);
645 vec.back().loc = currentLocation();
646 }
647 GroupConnector gc;
648 static AllowedGroupConnectors allowAnyConnectorGrpc(GroupConnector::orGC,
649 GroupConnector::andGC,
650 GroupConnector::seqGC,
651 GroupConnector::grpcGC);
652
653 if (!parseGroupConnector(allowAnyConnectorGrpc, declInputLevel,
654 groupInputLevel, gc))
655 return 0;
656 if (gc.type == GroupConnector::grpcGC)
657 break;
658 if (options().warnNameGroupNotOr) {
659 if (gc.type != GroupConnector::orGC)
660 message(ParserMessages::nameGroupNotOr);
661 }
662 else if (options().warnShould) {
663 if (connector == GroupConnector::grpcGC)
664 connector = gc.type;
665 else if (gc.type != connector) {
666 message(ParserMessages::mixedConnectors);
667 connector = gc.type;
668 }
669 }
670 }
671 if (nDuplicates + vec.size() > syntax().grpcnt())
672 message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
673 return 1;
674 }
675
parseDataTagGroup(unsigned nestingLevel,unsigned declInputLevel,GroupToken & result)676 Boolean Parser::parseDataTagGroup(unsigned nestingLevel,
677 unsigned declInputLevel, GroupToken &result)
678 {
679 if (nestingLevel - 1 == syntax().grplvl())
680 message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
681 unsigned groupInputLevel = inputLevel();
682 GroupToken gt;
683 static AllowedGroupTokens allowName(GroupToken::name);
684 if (!parseGroupToken(allowName, nestingLevel, declInputLevel,
685 groupInputLevel, gt))
686 return 0;
687 const ElementType *element = lookupCreateElement(gt.token);
688 GroupConnector gc;
689 static AllowedGroupConnectors allowSeq(GroupConnector::seqGC);
690 if (!parseGroupConnector(allowSeq, declInputLevel, groupInputLevel, gc))
691 return 0;
692 static AllowedGroupTokens
693 allowDataTagLiteralDataTagTemplateGroup(GroupToken::dataTagLiteral,
694 GroupToken::dataTagTemplateGroup);
695 if (!parseGroupToken(allowDataTagLiteralDataTagTemplateGroup,
696 nestingLevel,
697 declInputLevel,
698 groupInputLevel,
699 gt))
700 return 0;
701 Vector<Text> templates;
702 if (gt.type == GroupToken::dataTagTemplateGroup)
703 gt.textVector.swap(templates);
704 else {
705 templates.resize(1);
706 gt.text.swap(templates[0]);
707 }
708 static AllowedGroupConnectors allowSeqDtgc(GroupConnector::seqGC,
709 GroupConnector::dtgcGC);
710 if (!parseGroupConnector(allowSeqDtgc, declInputLevel, groupInputLevel, gc))
711 return 0;
712 NCVector<Owner<ContentToken> > vec(2);
713 vec[1] = new PcdataToken;
714 if (gc.type != GroupConnector::dtgcGC) {
715 static AllowedGroupTokens allowDataTagLiteral(GroupToken::dataTagLiteral);
716 if (!parseGroupToken(allowDataTagLiteral,
717 nestingLevel,
718 declInputLevel,
719 groupInputLevel,
720 gt))
721 return 0;
722 vec[0] = new DataTagElementToken(element, templates, gt.text);
723 static AllowedGroupConnectors allowDtgc(GroupConnector::dtgcGC);
724 if (!parseGroupConnector(allowDtgc, declInputLevel, groupInputLevel, gc))
725 return 0;
726 }
727 else
728 vec[0] = new DataTagElementToken(element, templates);
729 ContentToken::OccurrenceIndicator oi = getOccurrenceIndicator(grpMode);
730 result.contentToken = new DataTagGroup(vec, oi);
731 result.type = GroupToken::dataTagGroup;
732 return 1;
733 }
734
parseDataTagTemplateGroup(unsigned nestingLevel,unsigned declInputLevel,GroupToken & result)735 Boolean Parser::parseDataTagTemplateGroup(unsigned nestingLevel,
736 unsigned declInputLevel,
737 GroupToken &result)
738 {
739 if (nestingLevel - 1 == syntax().grplvl())
740 message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
741 unsigned groupInputLevel = inputLevel();
742 Vector<Text> &vec = result.textVector;
743 for (;;) {
744 GroupToken gt;
745 static AllowedGroupTokens allowDataTagLiteral(GroupToken::dataTagLiteral);
746 if (!parseGroupToken(allowDataTagLiteral,
747 nestingLevel,
748 declInputLevel,
749 groupInputLevel,
750 gt))
751 return 0;
752 if (vec.size() == syntax().grpcnt())
753 message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
754 vec.resize(vec.size() + 1);
755 gt.text.swap(vec.back());
756 static AllowedGroupConnectors allowOrGrpc(GroupConnector::orGC,
757 GroupConnector::grpcGC);
758 GroupConnector gc;
759 if (!parseGroupConnector(allowOrGrpc, declInputLevel, groupInputLevel, gc))
760 return 0;
761 if (gc.type == GroupConnector::grpcGC)
762 break;
763 }
764 return 1;
765 }
766
parseModelGroup(unsigned nestingLevel,unsigned declInputLevel,ModelGroup * & group,Mode oiMode)767 Boolean Parser::parseModelGroup(unsigned nestingLevel, unsigned declInputLevel,
768 ModelGroup *&group, Mode oiMode)
769 {
770 if (nestingLevel - 1 == syntax().grplvl())
771 message(ParserMessages::grplvl, NumberMessageArg(syntax().grplvl()));
772 unsigned groupInputLevel = inputLevel();
773 GroupToken gt;
774 NCVector<Owner<ContentToken> > tokenVector;
775 GroupConnector::Type connector = GroupConnector::grpcGC;
776
777 static AllowedGroupTokens allowContentToken(GroupToken::pcdata,
778 GroupToken::dataTagGroup,
779 GroupToken::elementToken,
780 GroupToken::modelGroup);
781 static AllowedGroupConnectors allowAnyConnectorGrpc(GroupConnector::orGC,
782 GroupConnector::andGC,
783 GroupConnector::seqGC,
784 GroupConnector::grpcGC);
785
786 static AllowedGroupConnectors allowOrGrpc(GroupConnector::orGC,
787 GroupConnector::grpcGC);
788 static AllowedGroupConnectors allowAndGrpc(GroupConnector::andGC,
789 GroupConnector::grpcGC);
790 static AllowedGroupConnectors allowSeqGrpc(GroupConnector::seqGC,
791 GroupConnector::grpcGC);
792 const AllowedGroupConnectors *connectorp = &allowAnyConnectorGrpc;
793
794 GroupConnector gc;
795 Boolean pcdataCheck = 0;
796 do {
797 if (!parseGroupToken(allowContentToken, nestingLevel, declInputLevel,
798 groupInputLevel, gt))
799 return 0;
800 ContentToken *contentToken;
801 if (gt.type == GroupToken::modelGroup)
802 contentToken = gt.model.extract();
803 else
804 contentToken = gt.contentToken.extract();
805 if (tokenVector.size() == syntax().grpcnt())
806 message(ParserMessages::groupCount, NumberMessageArg(syntax().grpcnt()));
807 tokenVector.resize(tokenVector.size() + 1);
808 tokenVector.back() = contentToken;
809 if (!parseGroupConnector(*connectorp, declInputLevel, groupInputLevel, gc))
810 return 0;
811 if (options().warnMixedContentRepOrGroup && gt.type == GroupToken::pcdata) {
812 if (tokenVector.size() != 1)
813 message(ParserMessages::pcdataNotFirstInGroup);
814 else if (gc.type == GroupConnector::seqGC)
815 message(ParserMessages::pcdataInSeqGroup);
816 else
817 pcdataCheck = 1;
818 if (nestingLevel != 1)
819 message(ParserMessages::pcdataInNestedModelGroup);
820 }
821 else if (pcdataCheck) {
822 if (gt.type == GroupToken::modelGroup)
823 message(ParserMessages::pcdataGroupMemberModelGroup);
824 if (contentToken->occurrenceIndicator() != ContentToken::none)
825 message(ParserMessages::pcdataGroupMemberOccurrenceIndicator);
826 }
827 if (tokenVector.size() == 1) {
828 connector = gc.type;
829 switch (gc.type) {
830 case GroupConnector::orGC:
831 connectorp = &allowOrGrpc;
832 break;
833 case GroupConnector::seqGC:
834 connectorp = &allowSeqGrpc;
835 break;
836 case GroupConnector::andGC:
837 connectorp = &allowAndGrpc;
838 if (options().warnAndGroup)
839 message(ParserMessages::andGroup);
840 break;
841 default:
842 break;
843 }
844 }
845 } while (gc.type != GroupConnector::grpcGC);
846 ContentToken::OccurrenceIndicator oi
847 = getOccurrenceIndicator(oiMode);
848 switch (connector) {
849 case GroupConnector::orGC:
850 group = new OrModelGroup(tokenVector, oi);
851 if (pcdataCheck && oi != ContentToken::rep)
852 message(ParserMessages::pcdataGroupNotRep);
853 break;
854 case GroupConnector::grpcGC:
855 if (pcdataCheck && oi != ContentToken::rep && oi != ContentToken::none)
856 message(ParserMessages::pcdataGroupNotRep);
857 // fall through
858 case GroupConnector::seqGC:
859 group = new SeqModelGroup(tokenVector, oi);
860 break;
861 case GroupConnector::andGC:
862 group = new AndModelGroup(tokenVector, oi);
863 break;
864 default:
865 break;
866 }
867 return 1;
868 }
869
870 ContentToken::OccurrenceIndicator
getOccurrenceIndicator(Mode oiMode)871 Parser::getOccurrenceIndicator(Mode oiMode)
872 {
873 Token token = getToken(oiMode);
874 switch (token) {
875 case tokenPlus:
876 if (currentMarkup())
877 currentMarkup()->addDelim(Syntax::dPLUS);
878 return ContentToken::plus;
879 case tokenOpt:
880 if (currentMarkup())
881 currentMarkup()->addDelim(Syntax::dOPT);
882 return ContentToken::opt;
883 case tokenRep:
884 if (currentMarkup())
885 currentMarkup()->addDelim(Syntax::dREP);
886 return ContentToken::rep;
887 default:
888 currentInput()->ungetToken();
889 return ContentToken::none;
890 }
891 }
892
parseMinimumLiteral(Boolean lita,Text & text)893 Boolean Parser::parseMinimumLiteral(Boolean lita, Text &text)
894 {
895 return parseLiteral(lita ? mlitaMode : mlitMode, mlitMode,
896 Syntax::referenceQuantity(Syntax::qLITLEN),
897 ParserMessages::minimumLiteralLength,
898 literalSingleSpace|literalMinimumData
899 |(eventsWanted().wantPrologMarkup()
900 ? literalDelimInfo
901 : 0),
902 text);
903 }
904
parseSystemIdentifier(Boolean lita,Text & text)905 Boolean Parser::parseSystemIdentifier(Boolean lita, Text &text)
906 {
907 return parseLiteral(lita ? slitaMode : slitMode, slitMode, syntax().litlen(),
908 ParserMessages::systemIdentifierLength,
909 (eventsWanted().wantPrologMarkup()
910 ? literalDelimInfo
911 : 0), text);
912 }
913
parseParameterLiteral(Boolean lita,Text & text)914 Boolean Parser::parseParameterLiteral(Boolean lita, Text &text)
915 {
916 return parseLiteral(lita ? plitaMode : plitMode, pliteMode, syntax().litlen(),
917 ParserMessages::parameterLiteralLength,
918 (eventsWanted().wantPrologMarkup()
919 ? literalDelimInfo
920 : 0),
921 text);
922 }
923
parseDataTagParameterLiteral(Boolean lita,Text & text)924 Boolean Parser::parseDataTagParameterLiteral(Boolean lita, Text &text)
925 {
926 return parseLiteral(lita ? plitaMode : plitMode, pliteMode,
927 syntax().dtemplen(),
928 ParserMessages::dataTagPatternLiteralLength,
929 literalDataTag
930 | (eventsWanted().wantPrologMarkup()
931 ? literalDelimInfo
932 : 0),
933 text);
934 }
935
parseIndicatedReservedName(const AllowedParams & allow,Param & parm)936 Boolean Parser::parseIndicatedReservedName(const AllowedParams &allow,
937 Param &parm)
938 {
939 Syntax::ReservedName rn;
940 if (!getIndicatedReservedName(&rn))
941 return 0;
942 if (!allow.reservedName(rn)) {
943 message(ParserMessages::invalidReservedName,
944 StringMessageArg(currentToken()));
945 return 0;
946 }
947 parm.type = Param::indicatedReservedName + rn;
948 return 1;
949 }
950
parseReservedName(const AllowedParams & allow,Param & parm)951 Boolean Parser::parseReservedName(const AllowedParams &allow,
952 Param &parm)
953 {
954 Syntax::ReservedName rn;
955 if (!getReservedName(&rn))
956 return 0;
957 if (!allow.reservedName(rn)) {
958 message(ParserMessages::invalidReservedName,
959 StringMessageArg(syntax().reservedName(rn)));
960 return 0;
961 }
962 parm.type = Param::reservedName + rn;
963 return 1;
964 }
965
966
parseAttributeValueParam(Param & parm)967 Boolean Parser::parseAttributeValueParam(Param &parm)
968 {
969 extendNameToken(syntax().litlen() > syntax().normsep()
970 ? syntax().litlen() - syntax().normsep()
971 : 0,
972 ParserMessages::attributeValueLength);
973 parm.type = Param::attributeValue;
974 Text text;
975 text.addChars(currentInput()->currentTokenStart(),
976 currentInput()->currentTokenLength(),
977 currentLocation());
978 text.swap(parm.literalText);
979 if (currentMarkup())
980 currentMarkup()->addAttributeValue(currentInput());
981 return 1;
982 }
983
getIndicatedReservedName(Syntax::ReservedName * result)984 Boolean Parser::getIndicatedReservedName(Syntax::ReservedName *result)
985 {
986 if (currentMarkup())
987 currentMarkup()->addDelim(Syntax::dRNI);
988 InputSource *in = currentInput();
989 in->startToken();
990 if (!syntax().isNameStartCharacter(in->tokenChar(messenger()))) {
991 message(ParserMessages::rniNameStart);
992 return 0;
993 }
994 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
995 StringC &buffer = nameBuffer();
996 getCurrentToken(syntax().generalSubstTable(), buffer);
997 if (!syntax().lookupReservedName(buffer, result)) {
998 message(ParserMessages::noSuchReservedName, StringMessageArg(buffer));
999 return 0;
1000 }
1001 if (currentMarkup())
1002 currentMarkup()->addReservedName(*result, currentInput());
1003 return 1;
1004 }
1005
getReservedName(Syntax::ReservedName * result)1006 Boolean Parser::getReservedName(Syntax::ReservedName *result)
1007 {
1008 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
1009 StringC &buffer = nameBuffer();
1010 getCurrentToken(syntax().generalSubstTable(), buffer);
1011 if (!syntax().lookupReservedName(buffer, result)) {
1012 message(ParserMessages::noSuchReservedName, StringMessageArg(buffer));
1013 return 0;
1014 }
1015 if (currentMarkup())
1016 currentMarkup()->addReservedName(*result, currentInput());
1017 return 1;
1018 }
1019
1020
1021 #ifdef SP_NAMESPACE
1022 }
1023 #endif
1024