1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 #if !defined(XPATHPROCESSORIMPL_HEADER_GUARD_1357924680)
19 #define XPATHPROCESSORIMPL_HEADER_GUARD_1357924680
20 
21 
22 
23 // Base header file.  Must be first.
24 #include <xalanc/XPath/XPathDefinitions.hpp>
25 
26 
27 
28 #include <cstdlib>
29 
30 
31 
32 #include <xalanc/XalanDOM/XalanDOMString.hpp>
33 
34 
35 
36 #include <xalanc/Include/XalanVector.hpp>
37 #include <xalanc/Include/XalanMap.hpp>
38 
39 
40 
41 #include "xalanc/PlatformSupport/XalanMessageLoader.hpp"
42 
43 
44 
45 // Base class header file...
46 #include <xalanc/XPath/XPathProcessor.hpp>
47 
48 
49 
50 #include <xalanc/XPath/XPath.hpp>
51 
52 
53 
54 namespace XALAN_CPP_NAMESPACE {
55 
56 
57 
58 class XalanNode;
59 
60 
61 
62 /**
63  * The XPathProcessorImpl class responsibilities include tokenizing and
64  * parsing the XPath expression, and acting as a general interface to XPaths.
65  */
66 class XALAN_XPATH_EXPORT XPathProcessorImpl : public XPathProcessor
67 {
68 public:
69 
70     typedef XalanMap<XalanDOMString,
71                 const XalanDOMString*>  StringToStringMapType;
72 
73     typedef XalanVector<bool>               BoolVectorType;
74 
75     typedef XalanDOMString::size_type   t_size_type;
76 
77     XPathProcessorImpl(MemoryManager& theManager XALAN_DEFAULT_MEMMGR);
78 
79     virtual
80     ~XPathProcessorImpl();
81 
82 
83     static XPathProcessorImpl*
84     create(MemoryManager& theManager);
85     // These are inherited from XPathProcessor...
86 
87     virtual void
88     initXPath(
89             XPath&                      pathObj,
90             XPathConstructionContext&   constructionContext,
91             const XalanDOMString&       expression,
92             const PrefixResolver&       resolver,
93             const Locator*              locator = 0,
94             bool                        allowVariableReferences = true,
95             bool                        allowKeyFunction = true);
96 
97     virtual void
98     initMatchPattern(
99             XPath&                      pathObj,
100             XPathConstructionContext&   constructionContext,
101             const XalanDOMString&       expression,
102             const PrefixResolver&       resolver,
103             const Locator*              locator = 0,
104             bool                        allowVariableReferences = true,
105             bool                        allowKeyFunction = true);
106 
107 private:
108 
109     /**
110      * Walk through the expression and build a token queue, and a map of the
111      * top-level elements.
112      *
113      * @param pat XSLT Expression.
114      */
115     void
116     tokenize(const XalanDOMString&  pat);
117 
118     void
119     addToTokenQueue(const XalanDOMString&   s) const;
120 
121     void
122     replaceTokenWithNamespaceToken() const;
123 
124     /**
125      * When a separator token is found, see if there's a element name or the
126      * like to map.
127      */
128     t_size_type
129     mapNSTokens(
130             const XalanDOMString&   pat,
131             t_size_type             startSubstring,
132             t_size_type             posOfNSSep,
133             t_size_type             posOfScan);
134 
135     /**
136      * Check if m_token==s. If m_token is null, this won't throw
137      * an exception, instead it just returns false (or true
138      * if s is also null).
139      */
140     bool
141     tokenIs(const XalanDOMString&   s) const;
142 
143     /**
144      * Check if m_token==s. If m_token is null, this won't throw
145      * an exception, instead it just returns false (or true
146      * if s is also null).
147      */
148     bool
149     tokenIs(const XalanDOMChar*     s) const;
150 
151     /**
152      * Check if m_token==s. If m_token is null, this won't throw
153      * an exception, instead it just returns false (or true
154      * if s is also null).
155      */
156     bool
157     tokenIs(XalanDOMChar    c) const;
158 
159     /**
160      * Lookahead of the current token in order to
161      * make a branching decision.
162      * @param s the string to compare it to.
163      * @param n number of tokens to lookahead.  Must be
164      * greater than 1.
165      */
166     bool
167     lookahead(
168             XalanDOMChar    c,
169             int             n) const;
170 
171     /**
172      * Lookahead of the current token in order to
173      * make a branching decision.
174      * @param s the string to compare it to.
175      * @param n number of tokens to lookahead.  Must be
176      * greater than 1.
177      */
178     bool
179     lookahead(
180             const XalanDOMChar*     s,
181             int                     n) const;
182 
183     /**
184      * Lookahead of the current token in order to
185      * make a branching decision.
186      * @param s the string to compare it to.
187      * @param n number of tokens to lookahead.  Must be
188      * greater than 1.
189      */
190     bool
191     lookahead(
192             const XalanDOMString&   s,
193             int                     n) const;
194 
195     /**
196      * Lookbehind the first character of the current token in order to
197      * make a branching decision.
198      * @param c the character to compare it to.
199      * @param n number of tokens to lookbehind.  Must be
200      * greater than 1.  Note that the lookbehind terminates
201      * at either the beginning of the string or on a '|'
202      * character.  Because of this, this method should only
203      * be used for pattern matching.
204      */
205     bool
206     lookbehind(
207             char    c,
208             int     n) const;
209 
210     /**
211      * look behind the current token in order to
212      * see if there is a useable token.
213      * @param n number of tokens to lookahead.  Must be
214      * greater than 1.  Note that the lookbehind terminates
215      * at either the beginning of the string or on a '|'
216      * character.  Because of this, this method should only
217      * be used for pattern matching.
218      * @return true if lookbehind has a token, false otherwise.
219      */
220     bool
221     lookbehindHasToken(int  n) const;
222 
223     /**
224      * Retrieve the next token from the command and
225      * store it in m_token string.
226      */
227     bool
228     nextToken();
229 
230     /**
231      * Retrieve the next token from the command and
232      * store it in m_token string.
233      */
234     const XalanDOMString&
235     getTokenRelative(int    theOffset) const;
236 
237     /**
238      * Retrieve the previous token from the command and
239      * store it in m_token string.
240      */
241     void
242     prevToken();
243 
244     /**
245      * Consume an expected token, throwing an exception if it
246      * isn't there.
247      */
248     void
249     consumeExpected(XalanDOMChar    expected);
250 
251     bool
252     isCurrentLiteral() const;
253 
254     /**
255      * Determine if the token is an axis
256      *
257      * @param theToken The token to test
258      * @return true if the token is a valid axis, false if not.
259      */
260     static bool
261     isAxis(const XalanDOMString&    theToken);
262 
263     /**
264      * Determine if the token could be a node test
265      *
266      * @param theToken The token to test
267      * @return true if the token is a valid node test, false if not.
268      */
269     static bool
270     isNodeTest(const XalanDOMString&    theToken);
271 
272     /**
273      * Throw an exception using the provided message text.
274      */
275     void
276     error(const XalanDOMString&     msg) const;
277 
278     /**
279      * Throw an exception using the provided message text.
280      */
281     void
282     error(XalanMessages::Codes  theCode) const;
283 
284     void
285     error(
286             XalanMessages::Codes    theCode,
287             const XalanDOMString&   theToken) const;
288 
289     void
290     error(
291             XalanMessages::Codes    theCode,
292             const XalanDOMChar*     theToken) const;
293 
294     void
295     error(
296             XalanMessages::Codes    theCode,
297             XalanDOMChar            theToken1,
298             const XalanDOMString&   theToken2) const;
299 
300     /**
301      * Given a string, return the corresponding token.
302      */
303     static XPathExpression::eOpCodes
getFunctionToken(const XalanDOMString & key)304     getFunctionToken(const XalanDOMString&  key)
305     {
306         return searchTable(s_functionTable, s_functionTableSize, key).m_opCode;
307     }
308 
309     /**
310      * Given a string, return the corresponding token.
311      */
312     static XPathExpression::eOpCodes
getNodeTypeToken(const XalanDOMString & key)313     getNodeTypeToken(const XalanDOMString&  key)
314     {
315         return searchTable(s_nodeTypeTable, s_nodeTypeTableSize, key).m_opCode;
316     }
317 
318     /**
319      * Given a string, return the corresponding token.
320      */
321     static XPathExpression::eOpCodes
getAxisToken(const XalanDOMString & key)322     getAxisToken(const XalanDOMString&  key)
323     {
324         return searchTable(s_axisTable, s_axisTableSize, key).m_opCode;
325     }
326 
327     /**
328      *
329      * --------------------------------------------------------------------------------
330     Expr  ::=  OrExpr
331      * --------------------------------------------------------------------------------
332      */
333     void
334     Expr();
335 
336 
337     /**
338      *
339      * --------------------------------------------------------------------------------
340      OrExpr  ::=    AndExpr
341      | OrExpr 'or' AndExpr
342      * --------------------------------------------------------------------------------
343      */
344     void
345     OrExpr();
346 
347     /**
348      *
349      * --------------------------------------------------------------------------------
350      AndExpr    ::=  EqualityExpr
351      | AndExpr 'and' EqualityExpr
352      * --------------------------------------------------------------------------------
353      */
354     void
355     AndExpr() ;
356 
357     /**
358      * XXXX.
359      * @returns an Object which is either a String, a Number, a Boolean, or a vector
360      * of nodes.
361      * --------------------------------------------------------------------------------
362      EqualityExpr  ::=  RelationalExpr
363      | EqualityExpr '=' RelationalExpr
364      * --------------------------------------------------------------------------------
365      */
366     int
367     EqualityExpr(int    opCodePos = -1);
368 
369     /**
370      * XXXX.
371      * @returns an Object which is either a String, a Number, a Boolean, or a vector
372      * of nodes.
373      * --------------------------------------------------------------------------------
374      RelationalExpr  ::=    AdditiveExpr
375      | RelationalExpr '<' AdditiveExpr
376      | RelationalExpr '>' AdditiveExpr
377      | RelationalExpr '<=' AdditiveExpr
378      | RelationalExpr '>=' AdditiveExpr
379      * --------------------------------------------------------------------------------
380      */
381     int
382     RelationalExpr(int  opCodePos = -1);
383 
384     /**
385      * XXXX.
386      * @returns an Object which is either a String, a Number, a Boolean, or a vector
387      * of nodes.
388      * --------------------------------------------------------------------------------
389      AdditiveExpr  ::=  MultiplicativeExpr
390      | AdditiveExpr '+' MultiplicativeExpr
391      | AdditiveExpr '-' MultiplicativeExpr
392      * --------------------------------------------------------------------------------
393      */
394     int
395     AdditiveExpr(int    opCodePos = -1);
396 
397     /**
398      * XXXX.
399      * @returns an Object which is either a String, a Number, a Boolean, or a vector
400      * of nodes.
401      * --------------------------------------------------------------------------------
402      MultiplicativeExpr  ::=    UnaryExpr
403      | MultiplicativeExpr MultiplyOperator UnaryExpr
404      | MultiplicativeExpr 'div' UnaryExpr
405      | MultiplicativeExpr 'mod' UnaryExpr
406      | MultiplicativeExpr 'quo' UnaryExpr
407      * --------------------------------------------------------------------------------
408      */
409     int
410     MultiplicativeExpr(int  opCodePos = -1);
411 
412     /**
413      * XXXX.
414      * @returns an Object which is either a String, a Number, a Boolean, or a vector
415      * of nodes.
416      * --------------------------------------------------------------------------------
417      UnaryExpr  ::=  UnionExpr
418      | '-' UnaryExpr
419      * --------------------------------------------------------------------------------
420      */
421     void
422     UnaryExpr();
423 
424     /**
425      * The context of the right hand side expressions is the context of the
426      * left hand side expression. The results of the right hand side expressions
427      * are node sets. The result of the left hand side UnionExpr is the union
428      * of the results of the right hand side expressions.
429      *
430      * --------------------------------------------------------------------------------
431      UnionExpr  ::=    PathExpr
432      | UnionExpr '|' PathExpr
433      * --------------------------------------------------------------------------------
434      */
435     void
436     UnionExpr();
437 
438     /**
439      *
440      * --------------------------------------------------------------------------------
441      PathExpr  ::=  LocationPath
442      | FilterExpr
443      | FilterExpr '/' RelativeLocationPath
444      | FilterExpr '//' RelativeLocationPath
445      * --------------------------------------------------------------------------------
446      * @exception XSLProcessorException thrown if the active ProblemListener and XMLParserLiaison decide
447      * the error condition is severe enough to halt processing.
448      */
449     void
450     PathExpr();
451 
452     /**
453      *
454      * --------------------------------------------------------------------------------
455      FilterExpr  ::=    PrimaryExpr
456      | FilterExpr Predicate
457      * --------------------------------------------------------------------------------
458      * @exception XSLProcessorException thrown if the active ProblemListener and XMLParserLiaison decide
459      * the error condition is severe enough to halt processing.
460      */
461     void
462     FilterExpr();
463 
464     /**
465      * --------------------------------------------------------------------------------
466      PrimaryExpr    ::=  VariableReference
467      | '(' Expr ')'
468      | Literal
469      | Number
470      | FunctionCall
471      * --------------------------------------------------------------------------------
472      */
473     void
474     PrimaryExpr();
475 
476 
477     /**
478      * --------------------------------------------------------------------------------
479      Argument    ::=      Expr
480      * --------------------------------------------------------------------------------
481      */
482     void
483     Argument();
484 
485     /**
486      * --------------------------------------------------------------------------------
487      FunctionCall    ::=      FunctionName '(' ( Argument ( ',' Argument)*)? ')'
488      * --------------------------------------------------------------------------------
489      */
490     void
491     FunctionCall();
492 
493     void
494     FunctionPosition();
495 
496     void
497     FunctionLast();
498 
499     void
500     FunctionCount();
501 
502     void
503     FunctionNot();
504 
505     void
506     FunctionTrue();
507 
508     void
509     FunctionFalse();
510 
511     void
512     FunctionBoolean();
513 
514     void
515     FunctionName(int    opPos);
516 
517     void
518     FunctionLocalName(int   opPos);
519 
520     void
521     FunctionNumber(int  opPos);
522 
523     void
524     FunctionFloor();
525 
526     void
527     FunctionCeiling();
528 
529     void
530     FunctionRound();
531 
532     void
533     FunctionString(int  opPos);
534 
535     void
536     FunctionStringLength(int    opPos);
537 
538     void
539     FunctionSum();
540 
541     void
542     FunctionNamespaceURI(int    opPos);
543 
544     /**
545      * --------------------------------------------------------------------------------
546      LocationPath ::= RelativeLocationPath
547      | AbsoluteLocationPath
548      * --------------------------------------------------------------------------------
549      */
550     void
551     LocationPath();
552 
553     /**
554      * --------------------------------------------------------------------------------
555      RelativeLocationPath ::= Step
556      | RelativeLocationPath '/' Step
557      | AbbreviatedRelativeLocationPath
558      * --------------------------------------------------------------------------------
559      */
560     void
561     RelativeLocationPath();
562 
563     /**
564      * --------------------------------------------------------------------------------
565      Step    ::=      Basis Predicate*
566      | AbbreviatedStep
567      */
568     void
569     Step();
570 
571     /**
572      * --------------------------------------------------------------------------------
573      Basis  ::=    AxisName '::' NodeTest
574      | AbbreviatedBasis
575      */
576     void
577     Basis();
578 
579     /**
580      * --------------------------------------------------------------------------------
581      Basis  ::=    AxisName '::' NodeTest
582      | AbbreviatedBasis
583      */
584     XPathExpression::eOpCodes
585     AxisName();
586 
587     /**
588      * --------------------------------------------------------------------------------
589      NodeTest    ::=      WildcardName
590      | NodeType '(' ')'
591      | 'processing-instruction' '(' Literal ')'
592      */
593     int
594     NodeTest();
595 
596     /**
597      * --------------------------------------------------------------------------------
598      Predicate ::= '[' PredicateExpr ']'
599      * --------------------------------------------------------------------------------
600      */
601     void
602     Predicate();
603 
604     /**
605      *--------------------------------------------------------------------------------
606      PredicateExpr ::= Expr
607      *--------------------------------------------------------------------------------
608      */
609     void
610     PredicateExpr();
611 
612     /**
613      * QName ::=    (Prefix ':')? LocalPart
614      * Prefix ::=  NCName
615      * LocalPart ::=    NCName
616      */
617     void
618     QName();
619 
620     /**
621      * NCName ::=  (Letter | '_') (NCNameChar)*
622      * NCNameChar ::=  Letter | Digit | '.' | '-' | '_' | CombiningChar | Extender
623      */
624     void
625     NCName();
626 
627     /**
628      * The value of the Literal is the sequence of characters inside
629      * the " or ' characters>.
630      * --------------------------------------------------------------------------------
631      Literal    ::=  '"' [^"]* '"'
632      | "'" [^']* "'"
633      * --------------------------------------------------------------------------------
634      */
635     void
636     Literal();
637 
638     /**
639      * --------------------------------------------------------------------------------
640      * Number ::= [0-9]+('.'[0-9]+)? | '.'[0-9]+
641      * --------------------------------------------------------------------------------
642      */
643     void
644     Number();
645 
646     /**
647      * --------------------------------------------------------------------------------
648      Pattern    ::=  LocationPathPattern
649      | Pattern '|' LocationPathPattern
650      * --------------------------------------------------------------------------------
651      */
652     void
653     Pattern();
654 
655     /**
656      *
657      * --------------------------------------------------------------------------------
658      LocationPathPattern    ::=  '/' RelativePathPattern?
659      | IdKeyPattern (('/' | '//') RelativePathPattern)?
660      | '//'? RelativePathPattern
661      * --------------------------------------------------------------------------------
662      */
663     void
664     LocationPathPattern();
665 
666     /**
667      * --------------------------------------------------------------------------------
668      IdKeyPattern  ::=  'id' '(' Literal ')'
669      | 'key' '(' Literal ',' Literal ')'
670      * (Also handle doc())
671      * --------------------------------------------------------------------------------
672      */
673     void
674     IdKeyPattern();
675 
676     /**
677      * --------------------------------------------------------------------------------
678      RelativePathPattern    ::=  StepPattern
679      | RelativePathPattern '/' StepPattern
680      | RelativePathPattern '//' StepPattern
681      * --------------------------------------------------------------------------------
682      */
683     void
684     RelativePathPattern();
685 
686     /**
687      * --------------------------------------------------------------------------------
688      StepPattern    ::=  AbbreviatedNodeTestStep
689      * --------------------------------------------------------------------------------
690      */
691     void
692     StepPattern();
693 
694     /**
695      * --------------------------------------------------------------------------------
696      AbbreviatedNodeTestStep      ::=    '@'? NodeTest Predicate*
697      * --------------------------------------------------------------------------------
698      */
699     void
700     AbbreviatedNodeTestStep();
701 
702     static bool
703     isValidFunction(const XalanDOMString&   key);
704 
705 private:
706 
707     int
708     FunctionCallArguments();
709 
710     struct TableEntry
711     {
712         const XalanDOMChar*         m_string;
713 
714         XPathExpression::eOpCodes   m_opCode;
715     };
716 
717     typedef std::size_t             size_type;
718 
719     static const TableEntry&
720     searchTable(
721         const TableEntry        theTable[],
722         size_type               theTableSize,
723         const XalanDOMString&   theString);
724 
725     /**
726      * The current input token.
727      */
728     XalanDOMString                  m_token;
729 
730     /**
731      * The first char in m_token, the theory being that this
732      * is an optimization because we won't have to do index
733      * into the string as often.
734      */
735     XalanDOMChar                    m_tokenChar;
736 
737     /**
738      * A pointer to the current XPath.
739      */
740     XPath*                          m_xpath;
741 
742     /**
743      * A pointer to the current XPathConstructionContext.
744      */
745     XPathConstructionContext*       m_constructionContext;
746 
747     /**
748      * A pointer to the current XPath's expression.
749      */
750     XPathExpression*                m_expression;
751 
752     /**
753      * A pointer to the current executionContext.
754      */
755     const PrefixResolver*           m_prefixResolver;
756 
757     bool                            m_requireLiterals;
758 
759     bool                            m_isMatchPattern;
760 
761     const Locator*                  m_locator;
762 
763     BoolVectorType                  m_positionPredicateStack;
764 
765     StringToStringMapType           m_namespaces;
766 
767     bool                            m_allowVariableReferences;
768 
769     bool                            m_allowKeyFunction;
770 
771     // Static stuff here...
772     static const XalanDOMString     s_emptyString;
773 
774     static const XalanDOMChar       s_functionIDString[];
775 
776     // This shouldn't really be here, since it's not part of the XPath standard,
777     // but rather a part ofthe XSLT standard.
778     static const XalanDOMChar       s_functionKeyString[];
779 
780     static const XalanDOMChar       s_orString[];
781 
782     static const XalanDOMChar       s_andString[];
783 
784     static const XalanDOMChar       s_divString[];
785 
786     static const XalanDOMChar       s_modString[];
787 
788     static const XalanDOMChar       s_dotString[];
789 
790     static const XalanDOMChar       s_dotDotString[];
791 
792     static const XalanDOMChar       s_axisString[];
793 
794     static const XalanDOMChar       s_attributeString[];
795 
796     static const XalanDOMChar       s_childString[];
797 
798     static const XalanDOMChar       s_lastString[];
799 
800     static const XalanDOMChar       s_positionString[];
801 
802     static const XalanDOMChar       s_asteriskString[];
803 
804     static const XalanDOMChar       s_commentString[];
805 
806     static const XalanDOMChar       s_piString[];
807 
808     static const XalanDOMChar       s_nodeString[];
809 
810     static const XalanDOMChar       s_textString[];
811 
812     static const XalanDOMChar       s_ancestorString[];
813 
814     static const XalanDOMChar       s_ancestorOrSelfString[];
815 
816     static const XalanDOMChar       s_descendantString[];
817 
818     static const XalanDOMChar       s_descendantOrSelfString[];
819 
820     static const XalanDOMChar       s_followingString[];
821 
822     static const XalanDOMChar       s_followingSiblingString[];
823 
824     static const XalanDOMChar       s_parentString[];
825 
826     static const XalanDOMChar       s_precedingString[];
827 
828     static const XalanDOMChar       s_precedingSiblingString[];
829 
830     static const XalanDOMChar       s_selfString[];
831 
832     static const XalanDOMChar       s_namespaceString[];
833 
834     static const TableEntry         s_functionTable[];
835 
836     static const size_type          s_functionTableSize;
837 
838     static const TableEntry         s_nodeTypeTable[];
839 
840     static const size_type          s_nodeTypeTableSize;
841 
842     static const TableEntry         s_axisTable[];
843 
844     static const size_type          s_axisTableSize;
845 
846     static const TableEntry         s_dummyEntry;
847 };
848 
849 
850 
851 }
852 
853 
854 
855 #endif  // XPATHPROCESSORIMPL_HEADER_GUARD_1357924680
856