1 /*
2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.xpath;
22 
23 import com.sun.org.apache.xerces.internal.util.SymbolTable;
24 import com.sun.org.apache.xerces.internal.util.XMLChar;
25 import com.sun.org.apache.xerces.internal.util.XMLSymbols;
26 import com.sun.org.apache.xerces.internal.xni.NamespaceContext;
27 import com.sun.org.apache.xerces.internal.xni.QName;
28 import java.util.HashMap;
29 import java.util.Map;
30 import java.util.Vector;
31 
32 /**
33  * Bare minimum XPath parser.
34  *
35  * @xerces.internal
36  *
37  * @author Andy Clark, IBM
38  * @author Sunitha Reddy, Sun Microsystems
39  */
40 public class XPath {
41 
42     //
43     // Constants
44     //
45 
46     private static final boolean DEBUG_ALL = false;
47 
48     private static final boolean DEBUG_XPATH_PARSE = DEBUG_ALL || false;
49 
50     private static final boolean DEBUG_ANY = DEBUG_XPATH_PARSE;
51 
52     //
53     // Data
54     //
55 
56     /** Expression. */
57     protected String fExpression;
58 
59     /** Symbol table. */
60     protected SymbolTable fSymbolTable;
61 
62     /** Location paths. */
63     protected LocationPath[] fLocationPaths;
64 
65     //
66     // Constructors
67     //
68 
69     /** Constructs an XPath object from the specified expression. */
XPath(String xpath, SymbolTable symbolTable, NamespaceContext context)70     public XPath(String xpath, SymbolTable symbolTable,
71                  NamespaceContext context)
72         throws XPathException {
73         fExpression = xpath;
74         fSymbolTable = symbolTable;
75         parseExpression(context);
76     } // <init>(String,SymbolTable,NamespaceContext)
77 
78     //
79     // Public methods
80     //
81 
82     /**
83      * Returns a representation of all location paths for this XPath.
84      * XPath = locationPath ( '|' locationPath)
85      */
getLocationPaths()86     public LocationPath[] getLocationPaths() {
87         LocationPath[] ret=new LocationPath[fLocationPaths.length];
88         for (int i=0;i<fLocationPaths.length;i++){
89             ret[i]=(LocationPath)fLocationPaths[i].clone();
90         }
91         return ret;
92     } // getLocationPath(LocationPath)
93 
94     /** Returns a representation of the first location path for this XPath. */
getLocationPath()95     public LocationPath getLocationPath() {
96         return (LocationPath)fLocationPaths[0].clone();
97     } // getLocationPath(LocationPath)
98 
99     //
100     // Object methods
101     //
102 
103     /** Returns a string representation of this object. */
toString()104     public String toString() {
105         StringBuffer buf=new StringBuffer();
106         for (int  i=0;i<fLocationPaths.length;i++){
107             if (i>0){
108                 buf.append("|");
109             }
110             buf.append(fLocationPaths[i].toString());
111         }
112         return buf.toString();
113     } // toString():String
114 
115     //
116     // Private methods
117     //
118 
119     /**
120      * Used by the {@link #parseExpression(NamespaceContext)} method
121      * to verify the assumption.
122      *
123      * If <tt>b</tt> is false, this method throws XPathException
124      * to report the error.
125      */
check( boolean b )126     private static void check( boolean b ) throws XPathException {
127         if(!b)      throw new XPathException("c-general-xpath");
128     }
129 
130     /**
131      * Used by the {@link #parseExpression(NamespaceContext)} method
132      * to build a {@link LocationPath} object from the accumulated
133      * {@link Step}s.
134      */
buildLocationPath( Vector stepsVector )135     private LocationPath buildLocationPath( Vector stepsVector ) throws XPathException {
136         int size = stepsVector.size();
137         check(size!=0);
138         Step[] steps = new Step[size];
139         stepsVector.copyInto(steps);
140         stepsVector.removeAllElements();
141 
142         return new LocationPath(steps);
143     }
144 
145     /**
146      * This method is implemented by using the XPathExprScanner and
147      * examining the list of tokens that it returns.
148      */
parseExpression(final NamespaceContext context)149     private void parseExpression(final NamespaceContext context)
150         throws XPathException {
151 
152         // tokens
153         final XPath.Tokens xtokens = new XPath.Tokens(fSymbolTable);
154 
155         // scanner
156         XPath.Scanner scanner = new XPath.Scanner(fSymbolTable) {
157             protected void addToken(XPath.Tokens tokens, int token)
158                 throws XPathException {
159                 if (
160                     token == XPath.Tokens.EXPRTOKEN_ATSIGN ||
161                     token == XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE ||
162                     token == XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME ||
163                     token == XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH ||
164                     token == XPath.Tokens.EXPRTOKEN_PERIOD ||
165                     token == XPath.Tokens.EXPRTOKEN_NAMETEST_ANY ||
166                     token == XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE ||
167                     token == XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH ||
168                     token == XPath.Tokens.EXPRTOKEN_OPERATOR_UNION ||
169                     token == XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD ||
170                     token == XPath.Tokens.EXPRTOKEN_DOUBLE_COLON
171                     ) {
172                     super.addToken(tokens, token);
173                     return;
174                 }
175                 throw new XPathException("c-general-xpath");
176             }
177         };
178 
179         int length = fExpression.length();
180 
181         boolean success = scanner.scanExpr(fSymbolTable,
182                                            xtokens, fExpression, 0, length);
183         if(!success)
184             throw new XPathException("c-general-xpath");
185 
186         //fTokens.dumpTokens();
187         Vector stepsVector = new Vector();
188         Vector locationPathsVector= new Vector();
189 
190         // true when the next token should be 'Step' (as defined in
191         // the production rule [3] of XML Schema P1 section 3.11.6
192         // if false, we are expecting either '|' or '/'.
193         //
194         // this is to make sure we can detect a token list like
195         // 'abc' '/' '/' 'def' 'ghi'
196         boolean expectingStep = true;
197         boolean expectingDoubleColon = false;
198 
199         while(xtokens.hasMore()) {
200             final int token = xtokens.nextToken();
201 
202             switch (token) {
203                 case  XPath.Tokens.EXPRTOKEN_OPERATOR_UNION :{
204                     check(!expectingStep);
205                     locationPathsVector.addElement(buildLocationPath(stepsVector));
206                     expectingStep=true;
207                     break;
208                 }
209 
210                 case XPath.Tokens.EXPRTOKEN_ATSIGN: {
211                     check(expectingStep);
212                     Step step = new Step(
213                             new Axis(Axis.ATTRIBUTE),
214                             parseNodeTest(xtokens.nextToken(),xtokens,context));
215                     stepsVector.addElement(step);
216                     expectingStep=false;
217                     break;
218                 }
219                 case XPath.Tokens.EXPRTOKEN_NAMETEST_ANY:
220                 case XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE:
221                 case XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME: {
222                     check(expectingStep);
223                     Step step = new Step(
224                             new Axis(Axis.CHILD),
225                             parseNodeTest(token,xtokens,context));
226                     stepsVector.addElement(step);
227                     expectingStep=false;
228                     break;
229                 }
230 
231                 case XPath.Tokens.EXPRTOKEN_PERIOD: {
232                     check(expectingStep);
233                     expectingStep=false;
234 
235                     // unless this is the first step in this location path,
236                     // there's really no reason to keep them in LocationPath.
237                     // This amounts to shorten "a/././b/./c" to "a/b/c".
238                     // Also, the matcher fails to work correctly if XPath
239                     // has those redundant dots.
240                     if (stepsVector.size()==0) {
241                         // build step
242                         Axis axis = new Axis(Axis.SELF);
243                         NodeTest nodeTest = new NodeTest(NodeTest.NODE);
244                         Step step = new Step(axis, nodeTest);
245                         stepsVector.addElement(step);
246 
247                         if( xtokens.hasMore()
248                          && xtokens.peekToken() == XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH){
249                             // consume '//'
250                             xtokens.nextToken();
251 
252                             // build step
253                             axis = new Axis(Axis.DESCENDANT);
254                             nodeTest = new NodeTest(NodeTest.NODE);
255                             step = new Step(axis, nodeTest);
256                             stepsVector.addElement(step);
257                             expectingStep=true;
258                         }
259                     }
260                     break;
261                 }
262 
263                 case XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH:{
264                     // this cannot appear in arbitrary position.
265                     // it is only allowed right after '.' when
266                     // '.' is the first token of a location path.
267                     throw new XPathException("c-general-xpath");
268                 }
269                 case XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH: {
270                     check(!expectingStep);
271                     expectingStep=true;
272                     break;
273                 }
274                 case XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE: {
275                      check(expectingStep);
276                      expectingDoubleColon = true;
277 
278                      if (xtokens.nextToken() == XPath.Tokens.EXPRTOKEN_DOUBLE_COLON){
279                          Step step = new Step(
280                          new Axis(Axis.ATTRIBUTE),
281                                  parseNodeTest(xtokens.nextToken(),xtokens,context));
282                          stepsVector.addElement(step);
283                          expectingStep=false;
284                          expectingDoubleColon = false;
285                      }
286                      break;
287                 }
288                 case XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD:{
289                     check(expectingStep);
290                     expectingDoubleColon = true;
291                     break;
292                 }
293                 case XPath.Tokens.EXPRTOKEN_DOUBLE_COLON :{
294                     check(expectingStep);
295                     check(expectingDoubleColon);
296                     expectingDoubleColon = false;
297                     break;
298                 }
299                 default:
300                     // we should have covered all the tokens that we can possibly see.
301                     throw new XPathException("c-general-xpath");
302            }
303         }
304 
305         check(!expectingStep);
306 
307         locationPathsVector.addElement(buildLocationPath(stepsVector));
308 
309         // save location path
310         fLocationPaths=new LocationPath[locationPathsVector.size()];
311         locationPathsVector.copyInto(fLocationPaths);
312 
313 
314         if (DEBUG_XPATH_PARSE) {
315             System.out.println(">>> "+fLocationPaths);
316         }
317 
318     } // parseExpression(SymbolTable,NamespaceContext)
319 
320     /**
321      * Used by {@link #parseExpression} to parse a node test
322      * from the token list.
323      */
parseNodeTest( int typeToken, Tokens xtokens, NamespaceContext context )324     private NodeTest parseNodeTest( int typeToken, Tokens xtokens, NamespaceContext context )
325         throws XPathException {
326         switch(typeToken) {
327         case XPath.Tokens.EXPRTOKEN_NAMETEST_ANY:
328             return new NodeTest(NodeTest.WILDCARD);
329 
330         case XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE:
331         case XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME:
332             // consume QName token
333             String prefix = xtokens.nextTokenAsString();
334             String uri = null;
335             if (context != null && prefix != XMLSymbols.EMPTY_STRING) {
336                 uri = context.getURI(prefix);
337             }
338             if (prefix != XMLSymbols.EMPTY_STRING && context != null && uri == null) {
339                 throw new XPathException("c-general-xpath-ns");
340             }
341 
342             if (typeToken==XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE)
343                 return new NodeTest(prefix,uri);
344 
345             String localpart = xtokens.nextTokenAsString();
346             String rawname = prefix != XMLSymbols.EMPTY_STRING
347             ? fSymbolTable.addSymbol(prefix+':'+localpart)
348             : localpart;
349 
350             return new NodeTest(new QName(prefix, localpart, rawname, uri));
351 
352         default:
353             // assertion error
354             throw new XPathException("c-general-xpath");
355 
356         }
357     }
358 
359 
360     //
361     // Classes
362     //
363 
364     // location path information
365 
366     /**
367      * A location path representation for an XPath expression.
368      *
369      * @xerces.internal
370      *
371      * @author Andy Clark, IBM
372      */
373     public static class LocationPath
374         implements Cloneable {
375 
376         //
377         // Data
378         //
379 
380         /** List of steps. */
381         public Step[] steps;
382 
383         //
384         // Constructors
385         //
386 
387         /** Creates a location path from a series of steps. */
LocationPath(Step[] steps)388         public LocationPath(Step[] steps) {
389             this.steps = steps;
390         } // <init>(Step[])
391 
392         /** Copy constructor. */
LocationPath(LocationPath path)393         protected LocationPath(LocationPath path) {
394             steps = new Step[path.steps.length];
395             for (int i = 0; i < steps.length; i++) {
396                 steps[i] = (Step)path.steps[i].clone();
397             }
398         } // <init>(LocationPath)
399 
400         //
401         // Object methods
402         //
403 
404         /** Returns a string representation of this object. */
toString()405         public String toString() {
406             StringBuffer str = new StringBuffer();
407             for (int i = 0; i < steps.length; i++) {
408                 if (i > 0       && (steps[i-1].axis.type!=Axis.DESCENDANT
409                     && steps[i].axis.type!=Axis.DESCENDANT) ){
410                     str.append('/');
411                 }
412                 str.append(steps[i].toString());
413             }
414             // DEBUG: This code is just for debugging and should *not*
415             //        be left in because it will mess up hashcodes of
416             //        serialized versions of this object. -Ac
417             if (false) {
418                 str.append('[');
419                 String s = super.toString();
420                 str.append(s.substring(s.indexOf('@')));
421                 str.append(']');
422             }
423             return str.toString();
424         } // toString():String
425 
426         /** Returns a clone of this object. */
clone()427         public Object clone() {
428             return new LocationPath(this);
429         } // clone():Object
430 
431     } // class locationPath
432 
433     /**
434      * A location path step comprised of an axis and node test.
435      *
436      * @xerces.internal
437      *
438      * @author Andy Clark, IBM
439      */
440     public static class Step
441         implements Cloneable {
442 
443         //
444         // Data
445         //
446 
447         /** Axis. */
448         public Axis axis;
449 
450         /** Node test. */
451         public NodeTest nodeTest;
452 
453         //
454         // Constructors
455         //
456 
457         /** Constructs a step from an axis and node test. */
Step(Axis axis, NodeTest nodeTest)458         public Step(Axis axis, NodeTest nodeTest) {
459             this.axis = axis;
460             this.nodeTest = nodeTest;
461         } // <init>(Axis,NodeTest)
462 
463         /** Copy constructor. */
Step(Step step)464         protected Step(Step step) {
465             axis = (Axis)step.axis.clone();
466             nodeTest = (NodeTest)step.nodeTest.clone();
467         } // <init>(Step)
468 
469         //
470         // Object methods
471         //
472 
473         /** Returns a string representation of this object. */
toString()474         public String toString() {
475             if (axis.type == Axis.SELF) {
476                 return ".";
477             }
478             if (axis.type == Axis.ATTRIBUTE) {
479                 return "@" + nodeTest.toString();
480             }
481             if (axis.type == Axis.CHILD) {
482                 return nodeTest.toString();
483             }
484             if (axis.type == Axis.DESCENDANT) {
485                 return "//";
486             }
487             return "??? ("+axis.type+')';
488         } // toString():String
489 
490         /** Returns a clone of this object. */
clone()491         public Object clone() {
492             return new Step(this);
493         } // clone():Object
494 
495     } // class Step
496 
497     /**
498      * Axis.
499      *
500      * @xerces.internal
501      *
502      * @author Andy Clark, IBM
503      */
504     public static class Axis
505         implements Cloneable {
506 
507         //
508         // Constants
509         //
510 
511         /** Type: child. */
512         public static final short CHILD = 1;
513 
514         /** Type: attribute. */
515         public static final short ATTRIBUTE = 2;
516 
517         /** Type: self. */
518         public static final short SELF = 3;
519 
520 
521         /** Type: descendant. */
522         public static final short DESCENDANT = 4;
523         //
524         // Data
525         //
526 
527         /** Axis type. */
528         public short type;
529 
530         //
531         // Constructors
532         //
533 
534         /** Constructs an axis with the specified type. */
Axis(short type)535         public Axis(short type) {
536             this.type = type;
537         } // <init>(short)
538 
539         /** Copy constructor. */
Axis(Axis axis)540         protected Axis(Axis axis) {
541             type = axis.type;
542         } // <init>(Axis)
543 
544         //
545         // Object methods
546         //
547 
548         /** Returns a string representation of this object. */
toString()549         public String toString() {
550             switch (type) {
551                 case CHILD: return "child";
552                 case ATTRIBUTE: return "attribute";
553                 case SELF: return "self";
554                 case DESCENDANT: return "descendant";
555             }
556             return "???";
557         } // toString():String
558 
559         /** Returns a clone of this object. */
clone()560         public Object clone() {
561             return new Axis(this);
562         } // clone():Object
563 
564     } // class Axis
565 
566     /**
567      * Node test.
568      *
569      * @xerces.internal
570      *
571      * @author Andy Clark, IBM
572      */
573     public static class NodeTest
574         implements Cloneable {
575 
576         //
577         // Constants
578         //
579 
580         /** Type: qualified name. */
581         public static final short QNAME = 1;
582 
583         /** Type: wildcard. */
584         public static final short WILDCARD = 2;
585 
586         /** Type: node. */
587         public static final short NODE = 3;
588 
589         /** Type: namespace */
590         public static final short NAMESPACE= 4;
591 
592         //
593         // Data
594         //
595 
596         /** Node test type. */
597         public short type;
598 
599         /** Node qualified name. */
600         public final QName name = new QName();
601 
602         //
603         // Constructors
604         //
605 
606         /** Constructs a node test of type WILDCARD or NODE. */
NodeTest(short type)607         public NodeTest(short type) {
608             this.type = type;
609         } // <init>(int)
610 
611         /** Constructs a node test of type QName. */
NodeTest(QName name)612         public NodeTest(QName name) {
613             this.type = QNAME;
614             this.name.setValues(name);
615         } // <init>(QName)
616         /** Constructs a node test of type Namespace. */
NodeTest(String prefix, String uri)617         public NodeTest(String prefix, String uri) {
618             this.type = NAMESPACE;
619             this.name.setValues(prefix, null, null, uri);
620         } // <init>(String,String)
621 
622         /** Copy constructor. */
NodeTest(NodeTest nodeTest)623         public NodeTest(NodeTest nodeTest) {
624             type = nodeTest.type;
625             name.setValues(nodeTest.name);
626         } // <init>(NodeTest)
627 
628         //
629         // Object methods
630         //
631 
632         /** Returns a string representation of this object. */
toString()633         public String toString() {
634 
635             switch (type) {
636                 case QNAME: {
637                     if (name.prefix.length() !=0) {
638                         if (name.uri != null) {
639                             return name.prefix+':'+name.localpart;
640                         }
641                         return "{"+name.uri+'}'+name.prefix+':'+name.localpart;
642                     }
643                     return name.localpart;
644                 }
645                 case NAMESPACE: {
646                     if (name.prefix.length() !=0) {
647                         if (name.uri != null) {
648                             return name.prefix+":*";
649                         }
650                         return "{"+name.uri+'}'+name.prefix+":*";
651                     }
652                     return "???:*";
653                 }
654                 case WILDCARD: {
655                     return "*";
656                 }
657                 case NODE: {
658                     return "node()";
659                 }
660             }
661             return "???";
662 
663         } // toString():String
664 
665         /** Returns a clone of this object. */
clone()666         public Object clone() {
667             return new NodeTest(this);
668         } // clone():Object
669 
670     } // class NodeTest
671 
672     // xpath implementation
673 
674     // NOTE: The XPath implementation classes are kept internal because
675     //       this implementation is just a temporary hack until a better
676     //       and/or more appropriate implementation can be written.
677     //       keeping the code in separate source files would "muddy" the
678     //       CVS directory when it's not needed. -Ac
679 
680     /**
681      * List of tokens.
682      *
683      * @xerces.internal
684      *
685      * @author Glenn Marcy, IBM
686      * @author Andy Clark, IBM
687      *
688      */
689     private static final class Tokens {
690 
691         static final boolean DUMP_TOKENS = false;
692 
693         /**
694          * [28] ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
695          *                  | NameTest | NodeType | Operator | FunctionName
696          *                  | AxisName | Literal | Number | VariableReference
697          */
698         public static final int
699             EXPRTOKEN_OPEN_PAREN                    =   0,
700             EXPRTOKEN_CLOSE_PAREN                   =   1,
701             EXPRTOKEN_OPEN_BRACKET                  =   2,
702             EXPRTOKEN_CLOSE_BRACKET                 =   3,
703             EXPRTOKEN_PERIOD                        =   4,
704             EXPRTOKEN_DOUBLE_PERIOD                 =   5,
705             EXPRTOKEN_ATSIGN                        =   6,
706             EXPRTOKEN_COMMA                         =   7,
707             EXPRTOKEN_DOUBLE_COLON                  =   8,
708             //
709             // [37] NameTest ::= '*' | NCName ':' '*' | QName
710             //
711             // followed by symbol handle of NCName or QName
712             //
713             EXPRTOKEN_NAMETEST_ANY                  =   9,
714             EXPRTOKEN_NAMETEST_NAMESPACE            =  10,
715             EXPRTOKEN_NAMETEST_QNAME                =  11,
716             //
717             // [38] NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
718             //
719             EXPRTOKEN_NODETYPE_COMMENT              =  12,
720             EXPRTOKEN_NODETYPE_TEXT                 =  13,
721             EXPRTOKEN_NODETYPE_PI                   =  14,
722             EXPRTOKEN_NODETYPE_NODE                 =  15,
723             //
724             // [32] Operator ::= OperatorName
725             //                 | MultiplyOperator
726             //                 | '/' | '//' | '|' | '+' | '-' | '=' | '!=' | '<' | '<=' | '>' | '>='
727             // [33] OperatorName ::= 'and' | 'or' | 'mod' | 'div'
728             // [34] MultiplyOperator ::= '*'
729             //
730             EXPRTOKEN_OPERATOR_AND                  =  16,
731             EXPRTOKEN_OPERATOR_OR                   =  17,
732             EXPRTOKEN_OPERATOR_MOD                  =  18,
733             EXPRTOKEN_OPERATOR_DIV                  =  19,
734             EXPRTOKEN_OPERATOR_MULT                 =  20,
735             EXPRTOKEN_OPERATOR_SLASH                =  21,
736             EXPRTOKEN_OPERATOR_DOUBLE_SLASH         =  22,
737             EXPRTOKEN_OPERATOR_UNION                =  23,
738             EXPRTOKEN_OPERATOR_PLUS                 =  24,
739             EXPRTOKEN_OPERATOR_MINUS                =  25,
740             EXPRTOKEN_OPERATOR_EQUAL                =  26,
741             EXPRTOKEN_OPERATOR_NOT_EQUAL            =  27,
742             EXPRTOKEN_OPERATOR_LESS                 =  28,
743             EXPRTOKEN_OPERATOR_LESS_EQUAL           =  29,
744             EXPRTOKEN_OPERATOR_GREATER              =  30,
745             EXPRTOKEN_OPERATOR_GREATER_EQUAL        =  31,
746 
747             //EXPRTOKEN_FIRST_OPERATOR                = EXPRTOKEN_OPERATOR_AND,
748             //EXPRTOKEN_LAST_OPERATOR                 = EXPRTOKEN_OPERATOR_GREATER_EQUAL,
749 
750             //
751             // [35] FunctionName ::= QName - NodeType
752             //
753             // followed by symbol handle
754             //
755             EXPRTOKEN_FUNCTION_NAME                 =  32,
756             //
757             // [6] AxisName ::= 'ancestor' | 'ancestor-or-self'
758             //                | 'attribute'
759             //                | 'child'
760             //                | 'descendant' | 'descendant-or-self'
761             //                | 'following' | 'following-sibling'
762             //                | 'namespace'
763             //                | 'parent'
764             //                | 'preceding' | 'preceding-sibling'
765             //                | 'self'
766             //
767             EXPRTOKEN_AXISNAME_ANCESTOR             =  33,
768             EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF     =  34,
769             EXPRTOKEN_AXISNAME_ATTRIBUTE            =  35,
770             EXPRTOKEN_AXISNAME_CHILD                =  36,
771             EXPRTOKEN_AXISNAME_DESCENDANT           =  37,
772             EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF   =  38,
773             EXPRTOKEN_AXISNAME_FOLLOWING            =  39,
774             EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING    =  40,
775             EXPRTOKEN_AXISNAME_NAMESPACE            =  41,
776             EXPRTOKEN_AXISNAME_PARENT               =  42,
777             EXPRTOKEN_AXISNAME_PRECEDING            =  43,
778             EXPRTOKEN_AXISNAME_PRECEDING_SIBLING    =  44,
779             EXPRTOKEN_AXISNAME_SELF                 =  45,
780             //
781             // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'"
782             //
783             // followed by symbol handle for literal
784             //
785             EXPRTOKEN_LITERAL                       =  46,
786             //
787             // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
788             // [31] Digits ::= [0-9]+
789             //
790             // followed by number handle
791             //
792             EXPRTOKEN_NUMBER                        =  47,
793             //
794             // [36] VariableReference ::= '$' QName
795             //
796             // followed by symbol handle for QName
797             //
798             EXPRTOKEN_VARIABLE_REFERENCE            =  48;
799 
800         private static final String[] fgTokenNames = {
801             "EXPRTOKEN_OPEN_PAREN",
802             "EXPRTOKEN_CLOSE_PAREN",
803             "EXPRTOKEN_OPEN_BRACKET",
804             "EXPRTOKEN_CLOSE_BRACKET",
805             "EXPRTOKEN_PERIOD",
806             "EXPRTOKEN_DOUBLE_PERIOD",
807             "EXPRTOKEN_ATSIGN",
808             "EXPRTOKEN_COMMA",
809             "EXPRTOKEN_DOUBLE_COLON",
810             "EXPRTOKEN_NAMETEST_ANY",
811             "EXPRTOKEN_NAMETEST_NAMESPACE",
812             "EXPRTOKEN_NAMETEST_QNAME",
813             "EXPRTOKEN_NODETYPE_COMMENT",
814             "EXPRTOKEN_NODETYPE_TEXT",
815             "EXPRTOKEN_NODETYPE_PI",
816             "EXPRTOKEN_NODETYPE_NODE",
817             "EXPRTOKEN_OPERATOR_AND",
818             "EXPRTOKEN_OPERATOR_OR",
819             "EXPRTOKEN_OPERATOR_MOD",
820             "EXPRTOKEN_OPERATOR_DIV",
821             "EXPRTOKEN_OPERATOR_MULT",
822             "EXPRTOKEN_OPERATOR_SLASH",
823             "EXPRTOKEN_OPERATOR_DOUBLE_SLASH",
824             "EXPRTOKEN_OPERATOR_UNION",
825             "EXPRTOKEN_OPERATOR_PLUS",
826             "EXPRTOKEN_OPERATOR_MINUS",
827             "EXPRTOKEN_OPERATOR_EQUAL",
828             "EXPRTOKEN_OPERATOR_NOT_EQUAL",
829             "EXPRTOKEN_OPERATOR_LESS",
830             "EXPRTOKEN_OPERATOR_LESS_EQUAL",
831             "EXPRTOKEN_OPERATOR_GREATER",
832             "EXPRTOKEN_OPERATOR_GREATER_EQUAL",
833             "EXPRTOKEN_FUNCTION_NAME",
834             "EXPRTOKEN_AXISNAME_ANCESTOR",
835             "EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF",
836             "EXPRTOKEN_AXISNAME_ATTRIBUTE",
837             "EXPRTOKEN_AXISNAME_CHILD",
838             "EXPRTOKEN_AXISNAME_DESCENDANT",
839             "EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF",
840             "EXPRTOKEN_AXISNAME_FOLLOWING",
841             "EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING",
842             "EXPRTOKEN_AXISNAME_NAMESPACE",
843             "EXPRTOKEN_AXISNAME_PARENT",
844             "EXPRTOKEN_AXISNAME_PRECEDING",
845             "EXPRTOKEN_AXISNAME_PRECEDING_SIBLING",
846             "EXPRTOKEN_AXISNAME_SELF",
847             "EXPRTOKEN_LITERAL",
848             "EXPRTOKEN_NUMBER",
849             "EXPRTOKEN_VARIABLE_REFERENCE"
850         };
851 
852         /**
853          *
854          */
855         private static final int INITIAL_TOKEN_COUNT = 1 << 8;
856         private int[] fTokens = new int[INITIAL_TOKEN_COUNT];
857         private int fTokenCount = 0;    // for writing
858 
859         private SymbolTable fSymbolTable;
860 
861         // REVISIT: Code something better here. -Ac
862         private Map<String, Integer> fSymbolMapping = new HashMap<>();
863 
864         // REVISIT: Code something better here. -Ac
865         private Map<Integer, String> fTokenNames = new HashMap<>();
866 
867         /**
868          * Current position in the token list.
869          */
870         private int fCurrentTokenIndex;
871 
872         //
873         // Constructors
874         //
875 
Tokens(SymbolTable symbolTable)876         public Tokens(SymbolTable symbolTable) {
877             fSymbolTable = symbolTable;
878             final String[] symbols = {
879                 "ancestor",     "ancestor-or-self",     "attribute",
880                 "child",        "descendant",           "descendant-or-self",
881                 "following",    "following-sibling",    "namespace",
882                 "parent",       "preceding",            "preceding-sibling",
883                 "self",
884             };
885             for (int i = 0; i < symbols.length; i++) {
886                 fSymbolMapping.put(fSymbolTable.addSymbol(symbols[i]), i);
887             }
888             fTokenNames.put(EXPRTOKEN_OPEN_PAREN, "EXPRTOKEN_OPEN_PAREN");
889             fTokenNames.put(EXPRTOKEN_CLOSE_PAREN, "EXPRTOKEN_CLOSE_PAREN");
890             fTokenNames.put(EXPRTOKEN_OPEN_BRACKET, "EXPRTOKEN_OPEN_BRACKET");
891             fTokenNames.put(EXPRTOKEN_CLOSE_BRACKET, "EXPRTOKEN_CLOSE_BRACKET");
892             fTokenNames.put(EXPRTOKEN_PERIOD, "EXPRTOKEN_PERIOD");
893             fTokenNames.put(EXPRTOKEN_DOUBLE_PERIOD, "EXPRTOKEN_DOUBLE_PERIOD");
894             fTokenNames.put(EXPRTOKEN_ATSIGN, "EXPRTOKEN_ATSIGN");
895             fTokenNames.put(EXPRTOKEN_COMMA, "EXPRTOKEN_COMMA");
896             fTokenNames.put(EXPRTOKEN_DOUBLE_COLON, "EXPRTOKEN_DOUBLE_COLON");
897             fTokenNames.put(EXPRTOKEN_NAMETEST_ANY, "EXPRTOKEN_NAMETEST_ANY");
898             fTokenNames.put(EXPRTOKEN_NAMETEST_NAMESPACE, "EXPRTOKEN_NAMETEST_NAMESPACE");
899             fTokenNames.put(EXPRTOKEN_NAMETEST_QNAME, "EXPRTOKEN_NAMETEST_QNAME");
900             fTokenNames.put(EXPRTOKEN_NODETYPE_COMMENT, "EXPRTOKEN_NODETYPE_COMMENT");
901             fTokenNames.put(EXPRTOKEN_NODETYPE_TEXT, "EXPRTOKEN_NODETYPE_TEXT");
902             fTokenNames.put(EXPRTOKEN_NODETYPE_PI, "EXPRTOKEN_NODETYPE_PI");
903             fTokenNames.put(EXPRTOKEN_NODETYPE_NODE, "EXPRTOKEN_NODETYPE_NODE");
904             fTokenNames.put(EXPRTOKEN_OPERATOR_AND, "EXPRTOKEN_OPERATOR_AND");
905             fTokenNames.put(EXPRTOKEN_OPERATOR_OR, "EXPRTOKEN_OPERATOR_OR");
906             fTokenNames.put(EXPRTOKEN_OPERATOR_MOD, "EXPRTOKEN_OPERATOR_MOD");
907             fTokenNames.put(EXPRTOKEN_OPERATOR_DIV, "EXPRTOKEN_OPERATOR_DIV");
908             fTokenNames.put(EXPRTOKEN_OPERATOR_MULT, "EXPRTOKEN_OPERATOR_MULT");
909             fTokenNames.put(EXPRTOKEN_OPERATOR_SLASH, "EXPRTOKEN_OPERATOR_SLASH");
910             fTokenNames.put(EXPRTOKEN_OPERATOR_DOUBLE_SLASH, "EXPRTOKEN_OPERATOR_DOUBLE_SLASH");
911             fTokenNames.put(EXPRTOKEN_OPERATOR_UNION, "EXPRTOKEN_OPERATOR_UNION");
912             fTokenNames.put(EXPRTOKEN_OPERATOR_PLUS, "EXPRTOKEN_OPERATOR_PLUS");
913             fTokenNames.put(EXPRTOKEN_OPERATOR_MINUS, "EXPRTOKEN_OPERATOR_MINUS");
914             fTokenNames.put(EXPRTOKEN_OPERATOR_EQUAL, "EXPRTOKEN_OPERATOR_EQUAL");
915             fTokenNames.put(EXPRTOKEN_OPERATOR_NOT_EQUAL, "EXPRTOKEN_OPERATOR_NOT_EQUAL");
916             fTokenNames.put(EXPRTOKEN_OPERATOR_LESS, "EXPRTOKEN_OPERATOR_LESS");
917             fTokenNames.put(EXPRTOKEN_OPERATOR_LESS_EQUAL, "EXPRTOKEN_OPERATOR_LESS_EQUAL");
918             fTokenNames.put(EXPRTOKEN_OPERATOR_GREATER, "EXPRTOKEN_OPERATOR_GREATER");
919             fTokenNames.put(EXPRTOKEN_OPERATOR_GREATER_EQUAL, "EXPRTOKEN_OPERATOR_GREATER_EQUAL");
920             fTokenNames.put(EXPRTOKEN_FUNCTION_NAME, "EXPRTOKEN_FUNCTION_NAME");
921             fTokenNames.put(EXPRTOKEN_AXISNAME_ANCESTOR, "EXPRTOKEN_AXISNAME_ANCESTOR");
922             fTokenNames.put(EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF, "EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF");
923             fTokenNames.put(EXPRTOKEN_AXISNAME_ATTRIBUTE, "EXPRTOKEN_AXISNAME_ATTRIBUTE");
924             fTokenNames.put(EXPRTOKEN_AXISNAME_CHILD, "EXPRTOKEN_AXISNAME_CHILD");
925             fTokenNames.put(EXPRTOKEN_AXISNAME_DESCENDANT, "EXPRTOKEN_AXISNAME_DESCENDANT");
926             fTokenNames.put(EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF, "EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF");
927             fTokenNames.put(EXPRTOKEN_AXISNAME_FOLLOWING, "EXPRTOKEN_AXISNAME_FOLLOWING");
928             fTokenNames.put(EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING, "EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING");
929             fTokenNames.put(EXPRTOKEN_AXISNAME_NAMESPACE, "EXPRTOKEN_AXISNAME_NAMESPACE");
930             fTokenNames.put(EXPRTOKEN_AXISNAME_PARENT, "EXPRTOKEN_AXISNAME_PARENT");
931             fTokenNames.put(EXPRTOKEN_AXISNAME_PRECEDING, "EXPRTOKEN_AXISNAME_PRECEDING");
932             fTokenNames.put(EXPRTOKEN_AXISNAME_PRECEDING_SIBLING, "EXPRTOKEN_AXISNAME_PRECEDING_SIBLING");
933             fTokenNames.put(EXPRTOKEN_AXISNAME_SELF, "EXPRTOKEN_AXISNAME_SELF");
934             fTokenNames.put(EXPRTOKEN_LITERAL, "EXPRTOKEN_LITERAL");
935             fTokenNames.put(EXPRTOKEN_NUMBER, "EXPRTOKEN_NUMBER");
936             fTokenNames.put(EXPRTOKEN_VARIABLE_REFERENCE, "EXPRTOKEN_VARIABLE_REFERENCE");
937         }
938 
939         //
940         // Public methods
941         //
942 
943 //        public String getTokenName(int token) {
944 //            if (token < 0 || token >= fgTokenNames.length)
945 //                return null;
946 //            return fgTokenNames[token];
947 //        }
948 //
getTokenString(int token)949         public String getTokenString(int token) {
950             return fTokenNames.get(token);
951         }
952 
addToken(String tokenStr)953         public void addToken(String tokenStr) {
954             Integer tokenInt = null;
955             for (Map.Entry<Integer, String> entry : fTokenNames.entrySet()) {
956                 if (entry.getValue().equals(tokenStr)) {
957                     tokenInt = entry.getKey();
958                 }
959             }
960             if (tokenInt == null) {
961                 tokenInt = fTokenNames.size();
962                 fTokenNames.put(tokenInt, tokenStr);
963             }
964             addToken(tokenInt);
965         }
966 
addToken(int token)967         public void addToken(int token) {
968             try {
969                 fTokens[fTokenCount] = token;
970             } catch (ArrayIndexOutOfBoundsException ex) {
971                 int[] oldList = fTokens;
972                 fTokens = new int[fTokenCount << 1];
973                 System.arraycopy(oldList, 0, fTokens, 0, fTokenCount);
974                 fTokens[fTokenCount] = token;
975             }
976             fTokenCount++;
977         }
978 //        public int getTokenCount() {
979 //            return fTokenCount;
980 //        }
981 //        public int getToken(int tokenIndex) {
982 //            return fTokens[tokenIndex];
983 //        }
984 
985         /**
986          * Resets the current position to the head of the token list.
987          */
rewind()988         public void rewind() {
989             fCurrentTokenIndex=0;
990         }
991         /**
992          * Returns true if the {@link #getNextToken()} method
993          * returns a valid token.
994          */
hasMore()995         public boolean hasMore() {
996             return fCurrentTokenIndex<fTokenCount;
997         }
998         /**
999          * Obtains the token at the current position, then advance
1000          * the current position by one.
1001          *
1002          * If there's no such next token, this method throws
1003          * <tt>new XPathException("c-general-xpath");</tt>.
1004          */
nextToken()1005         public int nextToken() throws XPathException {
1006             if( fCurrentTokenIndex==fTokenCount )
1007                 throw new XPathException("c-general-xpath");
1008             return fTokens[fCurrentTokenIndex++];
1009         }
1010         /**
1011          * Obtains the token at the current position, without advancing
1012          * the current position.
1013          *
1014          * If there's no such next token, this method throws
1015          * <tt>new XPathException("c-general-xpath");</tt>.
1016          */
peekToken()1017         public int peekToken() throws XPathException {
1018             if( fCurrentTokenIndex==fTokenCount )
1019                 throw new XPathException("c-general-xpath");
1020             return fTokens[fCurrentTokenIndex];
1021         }
1022         /**
1023          * Obtains the token at the current position as a String.
1024          *
1025          * If there's no current token or if the current token
1026          * is not a string token, this method throws
1027          * <tt>new XPathException("c-general-xpath");</tt>.
1028          */
nextTokenAsString()1029         public String nextTokenAsString() throws XPathException {
1030             String s = getTokenString(nextToken());
1031             if(s==null)     throw new XPathException("c-general-xpath");
1032             return s;
1033         }
1034 
dumpTokens()1035         public void dumpTokens() {
1036             //if (DUMP_TOKENS) {
1037                 for (int i = 0; i < fTokenCount; i++) {
1038                     switch (fTokens[i]) {
1039                     case EXPRTOKEN_OPEN_PAREN:
1040                         System.out.print("<OPEN_PAREN/>");
1041                         break;
1042                     case EXPRTOKEN_CLOSE_PAREN:
1043                         System.out.print("<CLOSE_PAREN/>");
1044                         break;
1045                     case EXPRTOKEN_OPEN_BRACKET:
1046                         System.out.print("<OPEN_BRACKET/>");
1047                         break;
1048                     case EXPRTOKEN_CLOSE_BRACKET:
1049                         System.out.print("<CLOSE_BRACKET/>");
1050                         break;
1051                     case EXPRTOKEN_PERIOD:
1052                         System.out.print("<PERIOD/>");
1053                         break;
1054                     case EXPRTOKEN_DOUBLE_PERIOD:
1055                         System.out.print("<DOUBLE_PERIOD/>");
1056                         break;
1057                     case EXPRTOKEN_ATSIGN:
1058                         System.out.print("<ATSIGN/>");
1059                         break;
1060                     case EXPRTOKEN_COMMA:
1061                         System.out.print("<COMMA/>");
1062                         break;
1063                     case EXPRTOKEN_DOUBLE_COLON:
1064                         System.out.print("<DOUBLE_COLON/>");
1065                         break;
1066                     case EXPRTOKEN_NAMETEST_ANY:
1067                         System.out.print("<NAMETEST_ANY/>");
1068                         break;
1069                     case EXPRTOKEN_NAMETEST_NAMESPACE:
1070                         System.out.print("<NAMETEST_NAMESPACE");
1071                         System.out.print(" prefix=\"" + getTokenString(fTokens[++i]) + "\"");
1072                         System.out.print("/>");
1073                         break;
1074                     case EXPRTOKEN_NAMETEST_QNAME:
1075                         System.out.print("<NAMETEST_QNAME");
1076                         if (fTokens[++i] != -1)
1077                             System.out.print(" prefix=\"" + getTokenString(fTokens[i]) + "\"");
1078                         System.out.print(" localpart=\"" + getTokenString(fTokens[++i]) + "\"");
1079                         System.out.print("/>");
1080                         break;
1081                     case EXPRTOKEN_NODETYPE_COMMENT:
1082                         System.out.print("<NODETYPE_COMMENT/>");
1083                         break;
1084                     case EXPRTOKEN_NODETYPE_TEXT:
1085                         System.out.print("<NODETYPE_TEXT/>");
1086                         break;
1087                     case EXPRTOKEN_NODETYPE_PI:
1088                         System.out.print("<NODETYPE_PI/>");
1089                         break;
1090                     case EXPRTOKEN_NODETYPE_NODE:
1091                         System.out.print("<NODETYPE_NODE/>");
1092                         break;
1093                     case EXPRTOKEN_OPERATOR_AND:
1094                         System.out.print("<OPERATOR_AND/>");
1095                         break;
1096                     case EXPRTOKEN_OPERATOR_OR:
1097                         System.out.print("<OPERATOR_OR/>");
1098                         break;
1099                     case EXPRTOKEN_OPERATOR_MOD:
1100                         System.out.print("<OPERATOR_MOD/>");
1101                         break;
1102                     case EXPRTOKEN_OPERATOR_DIV:
1103                         System.out.print("<OPERATOR_DIV/>");
1104                         break;
1105                     case EXPRTOKEN_OPERATOR_MULT:
1106                         System.out.print("<OPERATOR_MULT/>");
1107                         break;
1108                     case EXPRTOKEN_OPERATOR_SLASH:
1109                         System.out.print("<OPERATOR_SLASH/>");
1110                         if (i + 1 < fTokenCount) {
1111                             System.out.println();
1112                             System.out.print("  ");
1113                         }
1114                         break;
1115                     case EXPRTOKEN_OPERATOR_DOUBLE_SLASH:
1116                         System.out.print("<OPERATOR_DOUBLE_SLASH/>");
1117                         break;
1118                     case EXPRTOKEN_OPERATOR_UNION:
1119                         System.out.print("<OPERATOR_UNION/>");
1120                         break;
1121                     case EXPRTOKEN_OPERATOR_PLUS:
1122                         System.out.print("<OPERATOR_PLUS/>");
1123                         break;
1124                     case EXPRTOKEN_OPERATOR_MINUS:
1125                         System.out.print("<OPERATOR_MINUS/>");
1126                         break;
1127                     case EXPRTOKEN_OPERATOR_EQUAL:
1128                         System.out.print("<OPERATOR_EQUAL/>");
1129                         break;
1130                     case EXPRTOKEN_OPERATOR_NOT_EQUAL:
1131                         System.out.print("<OPERATOR_NOT_EQUAL/>");
1132                         break;
1133                     case EXPRTOKEN_OPERATOR_LESS:
1134                         System.out.print("<OPERATOR_LESS/>");
1135                         break;
1136                     case EXPRTOKEN_OPERATOR_LESS_EQUAL:
1137                         System.out.print("<OPERATOR_LESS_EQUAL/>");
1138                         break;
1139                     case EXPRTOKEN_OPERATOR_GREATER:
1140                         System.out.print("<OPERATOR_GREATER/>");
1141                         break;
1142                     case EXPRTOKEN_OPERATOR_GREATER_EQUAL:
1143                         System.out.print("<OPERATOR_GREATER_EQUAL/>");
1144                         break;
1145                     case EXPRTOKEN_FUNCTION_NAME:
1146                         System.out.print("<FUNCTION_NAME");
1147                         if (fTokens[++i] != -1)
1148                             System.out.print(" prefix=\"" + getTokenString(fTokens[i]) + "\"");
1149                         System.out.print(" localpart=\"" + getTokenString(fTokens[++i]) + "\"");
1150                         System.out.print("/>");
1151                         break;
1152                     case EXPRTOKEN_AXISNAME_ANCESTOR:
1153                         System.out.print("<AXISNAME_ANCESTOR/>");
1154                         break;
1155                     case EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF:
1156                         System.out.print("<AXISNAME_ANCESTOR_OR_SELF/>");
1157                         break;
1158                     case EXPRTOKEN_AXISNAME_ATTRIBUTE:
1159                         System.out.print("<AXISNAME_ATTRIBUTE/>");
1160                         break;
1161                     case EXPRTOKEN_AXISNAME_CHILD:
1162                         System.out.print("<AXISNAME_CHILD/>");
1163                         break;
1164                     case EXPRTOKEN_AXISNAME_DESCENDANT:
1165                         System.out.print("<AXISNAME_DESCENDANT/>");
1166                         break;
1167                     case EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF:
1168                         System.out.print("<AXISNAME_DESCENDANT_OR_SELF/>");
1169                         break;
1170                     case EXPRTOKEN_AXISNAME_FOLLOWING:
1171                         System.out.print("<AXISNAME_FOLLOWING/>");
1172                         break;
1173                     case EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING:
1174                         System.out.print("<AXISNAME_FOLLOWING_SIBLING/>");
1175                         break;
1176                     case EXPRTOKEN_AXISNAME_NAMESPACE:
1177                         System.out.print("<AXISNAME_NAMESPACE/>");
1178                         break;
1179                     case EXPRTOKEN_AXISNAME_PARENT:
1180                         System.out.print("<AXISNAME_PARENT/>");
1181                         break;
1182                     case EXPRTOKEN_AXISNAME_PRECEDING:
1183                         System.out.print("<AXISNAME_PRECEDING/>");
1184                         break;
1185                     case EXPRTOKEN_AXISNAME_PRECEDING_SIBLING:
1186                         System.out.print("<AXISNAME_PRECEDING_SIBLING/>");
1187                         break;
1188                     case EXPRTOKEN_AXISNAME_SELF:
1189                         System.out.print("<AXISNAME_SELF/>");
1190                         break;
1191                     case EXPRTOKEN_LITERAL:
1192                         System.out.print("<LITERAL");
1193                         System.out.print(" value=\"" + getTokenString(fTokens[++i]) + "\"");
1194                         System.out.print("/>");
1195                         break;
1196                     case EXPRTOKEN_NUMBER:
1197                         System.out.print("<NUMBER");
1198                         System.out.print(" whole=\"" + getTokenString(fTokens[++i]) + "\"");
1199                         System.out.print(" part=\"" + getTokenString(fTokens[++i]) + "\"");
1200                         System.out.print("/>");
1201                         break;
1202                     case EXPRTOKEN_VARIABLE_REFERENCE:
1203                         System.out.print("<VARIABLE_REFERENCE");
1204                         if (fTokens[++i] != -1)
1205                             System.out.print(" prefix=\"" + getTokenString(fTokens[i]) + "\"");
1206                         System.out.print(" localpart=\"" + getTokenString(fTokens[++i]) + "\"");
1207                         System.out.print("/>");
1208                         break;
1209                     default:
1210                         System.out.println("<???/>");
1211                     }
1212                 }
1213                 System.out.println();
1214             //}
1215         }
1216 
1217     } // class Tokens
1218 
1219     /**
1220      * @xerces.internal
1221      *
1222      * @author Glenn Marcy, IBM
1223      * @author Andy Clark, IBM
1224      *
1225      */
1226     private static class Scanner {
1227 
1228         /**
1229          * 7-bit ASCII subset
1230          *
1231          *  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
1232          *  0,  0,  0,  0,  0,  0,  0,  0,  0, HT, LF,  0,  0, CR,  0,  0,  // 0
1233          *  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  // 1
1234          * SP,  !,  ",  #,  $,  %,  &,  ',  (,  ),  *,  +,  ,,  -,  .,  /,  // 2
1235          *  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  :,  ;,  <,  =,  >,  ?,  // 3
1236          *  @,  A,  B,  C,  D,  E,  F,  G,  H,  I,  J,  K,  L,  M,  N,  O,  // 4
1237          *  P,  Q,  R,  S,  T,  U,  V,  W,  X,  Y,  Z,  [,  \,  ],  ^,  _,  // 5
1238          *  `,  a,  b,  c,  d,  e,  f,  g,  h,  i,  j,  k,  l,  m,  n,  o,  // 6
1239          *  p,  q,  r,  s,  t,  u,  v,  w,  x,  y,  z,  {,  |,  },  ~, DEL  // 7
1240          */
1241         private static final byte
1242             CHARTYPE_INVALID            =  0,   // invalid XML character
1243             CHARTYPE_OTHER              =  1,   // not special - one of "#%&;?\^`{}~" or DEL
1244             CHARTYPE_WHITESPACE         =  2,   // one of "\t\n\r " (0x09, 0x0A, 0x0D, 0x20)
1245             CHARTYPE_EXCLAMATION        =  3,   // '!' (0x21)
1246             CHARTYPE_QUOTE              =  4,   // '\"' or '\'' (0x22 and 0x27)
1247             CHARTYPE_DOLLAR             =  5,   // '$' (0x24)
1248             CHARTYPE_OPEN_PAREN         =  6,   // '(' (0x28)
1249             CHARTYPE_CLOSE_PAREN        =  7,   // ')' (0x29)
1250             CHARTYPE_STAR               =  8,   // '*' (0x2A)
1251             CHARTYPE_PLUS               =  9,   // '+' (0x2B)
1252             CHARTYPE_COMMA              = 10,   // ',' (0x2C)
1253             CHARTYPE_MINUS              = 11,   // '-' (0x2D)
1254             CHARTYPE_PERIOD             = 12,   // '.' (0x2E)
1255             CHARTYPE_SLASH              = 13,   // '/' (0x2F)
1256             CHARTYPE_DIGIT              = 14,   // '0'-'9' (0x30 to 0x39)
1257             CHARTYPE_COLON              = 15,   // ':' (0x3A)
1258             CHARTYPE_LESS               = 16,   // '<' (0x3C)
1259             CHARTYPE_EQUAL              = 17,   // '=' (0x3D)
1260             CHARTYPE_GREATER            = 18,   // '>' (0x3E)
1261             CHARTYPE_ATSIGN             = 19,   // '@' (0x40)
1262             CHARTYPE_LETTER             = 20,   // 'A'-'Z' or 'a'-'z' (0x41 to 0x5A and 0x61 to 0x7A)
1263             CHARTYPE_OPEN_BRACKET       = 21,   // '[' (0x5B)
1264             CHARTYPE_CLOSE_BRACKET      = 22,   // ']' (0x5D)
1265             CHARTYPE_UNDERSCORE         = 23,   // '_' (0x5F)
1266             CHARTYPE_UNION              = 24,   // '|' (0x7C)
1267             CHARTYPE_NONASCII           = 25;   // Non-ASCII Unicode codepoint (>= 0x80)
1268 
1269         private static final byte[] fASCIICharMap = {
1270             0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  2,  0,  0,  2,  0,  0,
1271             0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1272             2,  3,  4,  1,  5,  1,  1,  4,  6,  7,  8,  9, 10, 11, 12, 13,
1273            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,  1, 16, 17, 18,  1,
1274            19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1275            20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,  1, 22,  1, 23,
1276             1, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1277            20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,  1, 24,  1,  1,  1
1278         };
1279 
1280         /**
1281          * Symbol literals
1282          */
1283 
1284         //
1285         // Data
1286         //
1287 
1288         /** Symbol table. */
1289         private SymbolTable fSymbolTable;
1290 
1291         // symbols
1292 
1293         private static final String fAndSymbol = "and".intern();
1294         private static final String fOrSymbol = "or".intern();
1295         private static final String fModSymbol = "mod".intern();
1296         private static final String fDivSymbol = "div".intern();
1297 
1298         private static final String fCommentSymbol = "comment".intern();
1299         private static final String fTextSymbol = "text".intern();
1300         private static final String fPISymbol = "processing-instruction".intern();
1301         private static final String fNodeSymbol = "node".intern();
1302 
1303         private static final String fAncestorSymbol = "ancestor".intern();
1304         private static final String fAncestorOrSelfSymbol = "ancestor-or-self".intern();
1305         private static final String fAttributeSymbol = "attribute".intern();
1306         private static final String fChildSymbol = "child".intern();
1307         private static final String fDescendantSymbol = "descendant".intern();
1308         private static final String fDescendantOrSelfSymbol = "descendant-or-self".intern();
1309         private static final String fFollowingSymbol = "following".intern();
1310         private static final String fFollowingSiblingSymbol = "following-sibling".intern();
1311         private static final String fNamespaceSymbol = "namespace".intern();
1312         private static final String fParentSymbol = "parent".intern();
1313         private static final String fPrecedingSymbol = "preceding".intern();
1314         private static final String fPrecedingSiblingSymbol = "preceding-sibling".intern();
1315         private static final String fSelfSymbol = "self".intern();
1316 
1317         //
1318         // Constructors
1319         //
1320 
1321         /** Constructs an XPath expression scanner. */
Scanner(SymbolTable symbolTable)1322         public Scanner(SymbolTable symbolTable) {
1323 
1324             // save pool and tokens
1325             fSymbolTable = symbolTable;
1326 
1327         } // <init>(SymbolTable)
1328 
1329         /**
1330          *
1331          */
scanExpr(SymbolTable symbolTable, XPath.Tokens tokens, String data, int currentOffset, int endOffset)1332         public boolean scanExpr(SymbolTable symbolTable,
1333                                 XPath.Tokens tokens, String data,
1334                                 int currentOffset, int endOffset)
1335             throws XPathException {
1336 
1337             int nameOffset;
1338             String nameHandle, prefixHandle;
1339             boolean starIsMultiplyOperator = false;
1340             int ch;
1341 
1342             while (true) {
1343                 if (currentOffset == endOffset) {
1344                     break;
1345                 }
1346                 ch = data.charAt(currentOffset);
1347                 //
1348                 // [39] ExprWhitespace ::= S
1349                 //
1350                 while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
1351                     if (++currentOffset == endOffset) {
1352                         break;
1353                     }
1354                     ch = data.charAt(currentOffset);
1355                 }
1356                 if (currentOffset == endOffset) {
1357                     break;
1358                 }
1359                 //
1360                 // [28] ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
1361                 //                  | NameTest | NodeType | Operator | FunctionName
1362                 //                  | AxisName | Literal | Number | VariableReference
1363                 //
1364                 byte chartype = (ch >= 0x80) ? CHARTYPE_NONASCII : fASCIICharMap[ch];
1365                 switch (chartype) {
1366                 case CHARTYPE_OPEN_PAREN:       // '('
1367                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPEN_PAREN);
1368                     starIsMultiplyOperator = false;
1369                     if (++currentOffset == endOffset) {
1370                         break;
1371                     }
1372                     break;
1373                 case CHARTYPE_CLOSE_PAREN:      // ')'
1374                     addToken(tokens, XPath.Tokens.EXPRTOKEN_CLOSE_PAREN);
1375                     starIsMultiplyOperator = true;
1376                     if (++currentOffset == endOffset) {
1377                         break;
1378                     }
1379                     break;
1380                 case CHARTYPE_OPEN_BRACKET:     // '['
1381                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPEN_BRACKET);
1382                     starIsMultiplyOperator = false;
1383                     if (++currentOffset == endOffset) {
1384                         break;
1385                     }
1386                     break;
1387                 case CHARTYPE_CLOSE_BRACKET:    // ']'
1388                     addToken(tokens, XPath.Tokens.EXPRTOKEN_CLOSE_BRACKET);
1389                     starIsMultiplyOperator = true;
1390                     if (++currentOffset == endOffset) {
1391                         break;
1392                     }
1393                     break;
1394                 //
1395                 // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
1396                 //                                         ^^^^^^^^^^
1397                 //
1398                 case CHARTYPE_PERIOD:           // '.', '..' or '.' Digits
1399                     if (currentOffset + 1 == endOffset) {
1400                         addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
1401                         starIsMultiplyOperator = true;
1402                         currentOffset++;
1403                         break;
1404                     }
1405                     ch = data.charAt(currentOffset + 1);
1406                     if (ch == '.') {            // '..'
1407                         addToken(tokens, XPath.Tokens.EXPRTOKEN_DOUBLE_PERIOD);
1408                         starIsMultiplyOperator = true;
1409                         currentOffset += 2;
1410                     } else if (ch >= '0' && ch <= '9') {
1411                         addToken(tokens, XPath.Tokens.EXPRTOKEN_NUMBER);
1412                         starIsMultiplyOperator = true;
1413                         currentOffset = scanNumber(tokens, data, endOffset, currentOffset/*, encoding*/);
1414                     } else if (ch == '/') {
1415                         addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
1416                         starIsMultiplyOperator = true;
1417                         currentOffset++;
1418                     } else if (ch == '|') {
1419                         addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
1420                         starIsMultiplyOperator = true;
1421                         currentOffset++;
1422                         break;
1423                     } else if (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
1424                         // this is legal if the next token is non-existent or |
1425                         do {
1426                             if (++currentOffset == endOffset) {
1427                                 break;
1428                             }
1429                             ch = data.charAt(currentOffset);
1430                         } while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D);
1431                         if (currentOffset == endOffset || ch == '|' || ch == '/') {
1432                             addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
1433                             starIsMultiplyOperator = true;
1434                             break;
1435                         }
1436                         throw new XPathException ("c-general-xpath");
1437                     } else {                    // '.'
1438                         throw new XPathException ("c-general-xpath");
1439                     }
1440                     if (currentOffset == endOffset) {
1441                         break;
1442                     }
1443                     break;
1444                 case CHARTYPE_ATSIGN:           // '@'
1445                     addToken(tokens, XPath.Tokens.EXPRTOKEN_ATSIGN);
1446                     starIsMultiplyOperator = false;
1447                     if (++currentOffset == endOffset) {
1448                         break;
1449                     }
1450                     break;
1451                 case CHARTYPE_COMMA:            // ','
1452                     addToken(tokens, XPath.Tokens.EXPRTOKEN_COMMA);
1453                     starIsMultiplyOperator = false;
1454                     if (++currentOffset == endOffset) {
1455                         break;
1456                     }
1457                     break;
1458                 case CHARTYPE_COLON:            // '::'
1459                     if (++currentOffset == endOffset) {
1460                 // System.out.println("abort 1a");
1461                         return false; // REVISIT
1462                     }
1463                     ch = data.charAt(currentOffset);
1464                     if (ch != ':') {
1465                 // System.out.println("abort 1b");
1466                         return false; // REVISIT
1467                     }
1468                     addToken(tokens, XPath.Tokens.EXPRTOKEN_DOUBLE_COLON);
1469                     starIsMultiplyOperator = false;
1470                     if (++currentOffset == endOffset) {
1471                         break;
1472                     }
1473                     break;
1474                 case CHARTYPE_SLASH:            // '/' and '//'
1475                     if (++currentOffset == endOffset) {
1476                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH);
1477                         starIsMultiplyOperator = false;
1478                         break;
1479                     }
1480                     ch = data.charAt(currentOffset);
1481                     if (ch == '/') { // '//'
1482                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH);
1483                         starIsMultiplyOperator = false;
1484                         if (++currentOffset == endOffset) {
1485                             break;
1486                         }
1487                     } else {
1488                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH);
1489                         starIsMultiplyOperator = false;
1490                     }
1491                     break;
1492                 case CHARTYPE_UNION:            // '|'
1493                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_UNION);
1494                     starIsMultiplyOperator = false;
1495                     if (++currentOffset == endOffset) {
1496                         break;
1497                     }
1498                     break;
1499                 case CHARTYPE_PLUS:             // '+'
1500                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_PLUS);
1501                     starIsMultiplyOperator = false;
1502                     if (++currentOffset == endOffset) {
1503                         break;
1504                     }
1505                     break;
1506                 case CHARTYPE_MINUS:            // '-'
1507                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_MINUS);
1508                     starIsMultiplyOperator = false;
1509                     if (++currentOffset == endOffset) {
1510                         break;
1511                     }
1512                     break;
1513                 case CHARTYPE_EQUAL:            // '='
1514                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_EQUAL);
1515                     starIsMultiplyOperator = false;
1516                     if (++currentOffset == endOffset) {
1517                         break;
1518                     }
1519                     break;
1520                 case CHARTYPE_EXCLAMATION:      // '!='
1521                     if (++currentOffset == endOffset) {
1522                 // System.out.println("abort 2a");
1523                         return false; // REVISIT
1524                     }
1525                     ch = data.charAt(currentOffset);
1526                     if (ch != '=') {
1527                 // System.out.println("abort 2b");
1528                         return false; // REVISIT
1529                     }
1530                     addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_NOT_EQUAL);
1531                     starIsMultiplyOperator = false;
1532                     if (++currentOffset == endOffset) {
1533                         break;
1534                     }
1535                     break;
1536                 case CHARTYPE_LESS: // '<' and '<='
1537                     if (++currentOffset == endOffset) {
1538                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_LESS);
1539                         starIsMultiplyOperator = false;
1540                         break;
1541                     }
1542                     ch = data.charAt(currentOffset);
1543                     if (ch == '=') { // '<='
1544                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_LESS_EQUAL);
1545                         starIsMultiplyOperator = false;
1546                         if (++currentOffset == endOffset) {
1547                             break;
1548                         }
1549                     } else {
1550                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_LESS);
1551                         starIsMultiplyOperator = false;
1552                     }
1553                     break;
1554                 case CHARTYPE_GREATER: // '>' and '>='
1555                     if (++currentOffset == endOffset) {
1556                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_GREATER);
1557                         starIsMultiplyOperator = false;
1558                         break;
1559                     }
1560                     ch = data.charAt(currentOffset);
1561                     if (ch == '=') { // '>='
1562                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_GREATER_EQUAL);
1563                         starIsMultiplyOperator = false;
1564                         if (++currentOffset == endOffset) {
1565                             break;
1566                         }
1567                     } else {
1568                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_GREATER);
1569                         starIsMultiplyOperator = false;
1570                     }
1571                     break;
1572                 //
1573                 // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'"
1574                 //
1575                 case CHARTYPE_QUOTE:            // '\"' or '\''
1576                     int qchar = ch;
1577                     if (++currentOffset == endOffset) {
1578                 // System.out.println("abort 2c");
1579                         return false; // REVISIT
1580                     }
1581                     ch = data.charAt(currentOffset);
1582                     int litOffset = currentOffset;
1583                     while (ch != qchar) {
1584                         if (++currentOffset == endOffset) {
1585                 // System.out.println("abort 2d");
1586                             return false; // REVISIT
1587                         }
1588                         ch = data.charAt(currentOffset);
1589                     }
1590                     int litLength = currentOffset - litOffset;
1591                     addToken(tokens, XPath.Tokens.EXPRTOKEN_LITERAL);
1592                     starIsMultiplyOperator = true;
1593                     tokens.addToken(symbolTable.addSymbol(data.substring(litOffset, litOffset + litLength)));
1594                     if (++currentOffset == endOffset) {
1595                         break;
1596                     }
1597                     break;
1598                 //
1599                 // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
1600                 // [31] Digits ::= [0-9]+
1601                 //
1602                 case CHARTYPE_DIGIT:
1603                     addToken(tokens, XPath.Tokens.EXPRTOKEN_NUMBER);
1604                     starIsMultiplyOperator = true;
1605                     currentOffset = scanNumber(tokens, data, endOffset, currentOffset/*, encoding*/);
1606                     break;
1607                 //
1608                 // [36] VariableReference ::= '$' QName
1609                 //
1610                 case CHARTYPE_DOLLAR:
1611                     if (++currentOffset == endOffset) {
1612                 // System.out.println("abort 3a");
1613                         return false; // REVISIT
1614                     }
1615                     nameOffset = currentOffset;
1616                     currentOffset = scanNCName(data, endOffset, currentOffset);
1617                     if (currentOffset == nameOffset) {
1618                 // System.out.println("abort 3b");
1619                         return false; // REVISIT
1620                     }
1621                     if (currentOffset < endOffset) {
1622                         ch = data.charAt(currentOffset);
1623                     }
1624                     else {
1625                         ch = -1;
1626                     }
1627                     nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
1628                     if (ch != ':') {
1629                         prefixHandle = XMLSymbols.EMPTY_STRING;
1630                     } else {
1631                         prefixHandle = nameHandle;
1632                         if (++currentOffset == endOffset) {
1633                 // System.out.println("abort 4a");
1634                             return false; // REVISIT
1635                         }
1636                         nameOffset = currentOffset;
1637                         currentOffset = scanNCName(data, endOffset, currentOffset);
1638                         if (currentOffset == nameOffset) {
1639                 // System.out.println("abort 4b");
1640                             return false; // REVISIT
1641                         }
1642                         if (currentOffset < endOffset) {
1643                             ch = data.charAt(currentOffset);
1644                         }
1645                         else {
1646                             ch = -1;
1647                         }
1648                         nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
1649                     }
1650                     addToken(tokens, XPath.Tokens.EXPRTOKEN_VARIABLE_REFERENCE);
1651                     starIsMultiplyOperator = true;
1652                     tokens.addToken(prefixHandle);
1653                     tokens.addToken(nameHandle);
1654                     break;
1655                 //
1656                 // [37] NameTest ::= '*' | NCName ':' '*' | QName
1657                 // [34] MultiplyOperator ::= '*'
1658                 //
1659                 case CHARTYPE_STAR:             // '*'
1660                     //
1661                     // 3.7 Lexical Structure
1662                     //
1663                     //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
1664                     //  an Operator, then a * must be recognized as a MultiplyOperator.
1665                     //
1666                     // Otherwise, the token must not be recognized as a MultiplyOperator.
1667                     //
1668                     if (starIsMultiplyOperator) {
1669                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_MULT);
1670                         starIsMultiplyOperator = false;
1671                     } else {
1672                         addToken(tokens, XPath.Tokens.EXPRTOKEN_NAMETEST_ANY);
1673                         starIsMultiplyOperator = true;
1674                     }
1675                     if (++currentOffset == endOffset) {
1676                         break;
1677                     }
1678                     break;
1679                 //
1680                 // NCName, QName and non-terminals
1681                 //
1682                 case CHARTYPE_NONASCII: // possibly a valid non-ascii 'Letter' (BaseChar | Ideographic)
1683                 case CHARTYPE_LETTER:
1684                 case CHARTYPE_UNDERSCORE:
1685                     //
1686                     // 3.7 Lexical Structure
1687                     //
1688                     //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
1689                     //  an Operator, then an NCName must be recognized as an OperatorName.
1690                     //
1691                     //  If the character following an NCName (possibly after intervening ExprWhitespace) is (,
1692                     //  then the token must be recognized as a NodeType or a FunctionName.
1693                     //
1694                     //  If the two characters following an NCName (possibly after intervening ExprWhitespace)
1695                     //  are ::, then the token must be recognized as an AxisName.
1696                     //
1697                     //  Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
1698                     //  FunctionName, or an AxisName.
1699                     //
1700                     // [33] OperatorName ::= 'and' | 'or' | 'mod' | 'div'
1701                     // [38] NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
1702                     // [35] FunctionName ::= QName - NodeType
1703                     // [6] AxisName ::= (see above)
1704                     //
1705                     // [37] NameTest ::= '*' | NCName ':' '*' | QName
1706                     // [5] NCName ::= (Letter | '_') (NCNameChar)*
1707                     // [?] NCNameChar ::= Letter | Digit | '.' | '-' | '_'  (ascii subset of 'NCNameChar')
1708                     // [?] QName ::= (NCName ':')? NCName
1709                     // [?] Letter ::= [A-Za-z]                              (ascii subset of 'Letter')
1710                     // [?] Digit ::= [0-9]                                  (ascii subset of 'Digit')
1711                     //
1712                     nameOffset = currentOffset;
1713                     currentOffset = scanNCName(data, endOffset, currentOffset);
1714                     if (currentOffset == nameOffset) {
1715                 // System.out.println("abort 4c");
1716                         return false; // REVISIT
1717                     }
1718                     if (currentOffset < endOffset) {
1719                         ch = data.charAt(currentOffset);
1720                     }
1721                     else {
1722                         ch = -1;
1723                     }
1724                     nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
1725                     boolean isNameTestNCName = false;
1726                     boolean isAxisName = false;
1727                     prefixHandle = XMLSymbols.EMPTY_STRING;
1728                     if (ch == ':') {
1729                         if (++currentOffset == endOffset) {
1730                 // System.out.println("abort 5");
1731                             return false; // REVISIT
1732                         }
1733                         ch = data.charAt(currentOffset);
1734                         if (ch == '*') {
1735                             if (++currentOffset < endOffset) {
1736                                 ch = data.charAt(currentOffset);
1737                             }
1738                             isNameTestNCName = true;
1739                         } else if (ch == ':') {
1740                             if (++currentOffset < endOffset) {
1741                                 ch = data.charAt(currentOffset);
1742                             }
1743                             isAxisName = true;
1744                         } else {
1745                             prefixHandle = nameHandle;
1746                             nameOffset = currentOffset;
1747                             currentOffset = scanNCName(data, endOffset, currentOffset);
1748                             if (currentOffset == nameOffset) {
1749                 // System.out.println("abort 5b");
1750                                 return false; // REVISIT
1751                             }
1752                             if (currentOffset < endOffset) {
1753                                 ch = data.charAt(currentOffset);
1754                             }
1755                             else {
1756                                 ch = -1;
1757                             }
1758                             nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
1759                         }
1760                     }
1761                     //
1762                     // [39] ExprWhitespace ::= S
1763                     //
1764                     while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
1765                         if (++currentOffset == endOffset) {
1766                             break;
1767                         }
1768                         ch = data.charAt(currentOffset);
1769                     }
1770                     //
1771                     //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
1772                     //  an Operator, then an NCName must be recognized as an OperatorName.
1773                     //
1774                     if (starIsMultiplyOperator) {
1775                         if (nameHandle == fAndSymbol) {
1776                             addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_AND);
1777                             starIsMultiplyOperator = false;
1778                         } else if (nameHandle == fOrSymbol) {
1779                             addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_OR);
1780                             starIsMultiplyOperator = false;
1781                         } else if (nameHandle == fModSymbol) {
1782                             addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_MOD);
1783                             starIsMultiplyOperator = false;
1784                         } else if (nameHandle == fDivSymbol) {
1785                             addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_DIV);
1786                             starIsMultiplyOperator = false;
1787                         } else {
1788                 // System.out.println("abort 6");
1789                             return false; // REVISIT
1790                         }
1791                         if (isNameTestNCName) {
1792                 // System.out.println("abort 7");
1793                             return false; // REVISIT - NCName:* where an OperatorName is required
1794                         } else if (isAxisName) {
1795                 // System.out.println("abort 8");
1796                             return false; // REVISIT - AxisName:: where an OperatorName is required
1797                         }
1798                         break;
1799                     }
1800                     //
1801                     //  If the character following an NCName (possibly after intervening ExprWhitespace) is (,
1802                     //  then the token must be recognized as a NodeType or a FunctionName.
1803                     //
1804                     if (ch == '(' && !isNameTestNCName && !isAxisName) {
1805                         if (nameHandle == fCommentSymbol) {
1806                             addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_COMMENT);
1807                         } else if (nameHandle == fTextSymbol) {
1808                             addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_TEXT);
1809                         } else if (nameHandle == fPISymbol) {
1810                             addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_PI);
1811                         } else if (nameHandle == fNodeSymbol) {
1812                             addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_NODE);
1813                         } else {
1814                             addToken(tokens, XPath.Tokens.EXPRTOKEN_FUNCTION_NAME);
1815                             tokens.addToken(prefixHandle);
1816                             tokens.addToken(nameHandle);
1817                         }
1818                         addToken(tokens, XPath.Tokens.EXPRTOKEN_OPEN_PAREN);
1819                         starIsMultiplyOperator = false;
1820                         if (++currentOffset == endOffset) {
1821                             break;
1822                         }
1823                         break;
1824                     }
1825                     //
1826                     //  If the two characters following an NCName (possibly after intervening ExprWhitespace)
1827                     //  are ::, then the token must be recognized as an AxisName.
1828                     //
1829                     if (isAxisName ||
1830                         (ch == ':' && currentOffset + 1 < endOffset &&
1831                          data.charAt(currentOffset + 1) == ':')) {
1832                         if (nameHandle == fAncestorSymbol) {
1833                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_ANCESTOR);
1834                         } else if (nameHandle == fAncestorOrSelfSymbol) {
1835                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF);
1836                         } else if (nameHandle == fAttributeSymbol) {
1837                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE);
1838                         } else if (nameHandle == fChildSymbol) {
1839                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD);
1840                         } else if (nameHandle == fDescendantSymbol) {
1841                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_DESCENDANT);
1842                         } else if (nameHandle == fDescendantOrSelfSymbol) {
1843                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF);
1844                         } else if (nameHandle == fFollowingSymbol) {
1845                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_FOLLOWING);
1846                         } else if (nameHandle == fFollowingSiblingSymbol) {
1847                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING);
1848                         } else if (nameHandle == fNamespaceSymbol) {
1849                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_NAMESPACE);
1850                         } else if (nameHandle == fParentSymbol) {
1851                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_PARENT);
1852                         } else if (nameHandle == fPrecedingSymbol) {
1853                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_PRECEDING);
1854                         } else if (nameHandle == fPrecedingSiblingSymbol) {
1855                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_PRECEDING_SIBLING);
1856                         } else if (nameHandle == fSelfSymbol) {
1857                             addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_SELF);
1858                         } else {
1859                 // System.out.println("abort 9");
1860                             return false; // REVISIT
1861                         }
1862                         if (isNameTestNCName) {
1863                 // System.out.println("abort 10");
1864                             return false; // REVISIT - "NCName:* ::" where "AxisName ::" is required
1865                         }
1866                         addToken(tokens, XPath.Tokens.EXPRTOKEN_DOUBLE_COLON);
1867                         starIsMultiplyOperator = false;
1868                         if (!isAxisName) {
1869                             currentOffset++;
1870                             if (++currentOffset == endOffset) {
1871                                 break;
1872                             }
1873                         }
1874                         break;
1875                     }
1876                     //
1877                     //  Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
1878                     //  FunctionName, or an AxisName.
1879                     //
1880                     if (isNameTestNCName) {
1881                         addToken(tokens, XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE);
1882                         starIsMultiplyOperator = true;
1883                         tokens.addToken(nameHandle);
1884                     } else {
1885                         addToken(tokens, XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME);
1886                         starIsMultiplyOperator = true;
1887                         tokens.addToken(prefixHandle);
1888                         tokens.addToken(nameHandle);
1889                     }
1890                     break;
1891                 }
1892             }
1893             if (XPath.Tokens.DUMP_TOKENS) {
1894                 tokens.dumpTokens();
1895             }
1896             return true;
1897         }
1898         //
1899         // [5] NCName ::= (Letter | '_') (NCNameChar)*
1900         // [6] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar | Extender
1901         //
scanNCName(String data, int endOffset, int currentOffset)1902         int scanNCName(String data, int endOffset, int currentOffset) {
1903             int ch = data.charAt(currentOffset);
1904             if (ch >= 0x80) {
1905                 if (!XMLChar.isNameStart(ch))
1906                 /*** // REVISIT: Make sure this is a negation. ***
1907                 if ((XMLCharacterProperties.fgCharFlags[ch] &
1908                      XMLCharacterProperties.E_InitialNameCharFlag) == 0)
1909                 /***/
1910                 {
1911                     return currentOffset;
1912                 }
1913             }
1914             else {
1915                 byte chartype = fASCIICharMap[ch];
1916                 if (chartype != CHARTYPE_LETTER && chartype != CHARTYPE_UNDERSCORE) {
1917                     return currentOffset;
1918                 }
1919             }
1920             while (++currentOffset < endOffset) {
1921                 ch = data.charAt(currentOffset);
1922                 if (ch >= 0x80) {
1923                     if (!XMLChar.isName(ch))
1924                     /*** // REVISIT: Make sure this is a negation. ***
1925                     if ((XMLCharacterProperties.fgCharFlags[ch] &
1926                          XMLCharacterProperties.E_NameCharFlag) == 0)
1927                     /***/
1928                     {
1929                         break;
1930                     }
1931                 }
1932                 else {
1933                     byte chartype = fASCIICharMap[ch];
1934                     if (chartype != CHARTYPE_LETTER && chartype != CHARTYPE_DIGIT &&
1935                         chartype != CHARTYPE_PERIOD && chartype != CHARTYPE_MINUS &&
1936                         chartype != CHARTYPE_UNDERSCORE)
1937                     {
1938                         break;
1939                     }
1940                 }
1941             }
1942             return currentOffset;
1943         }
1944         //
1945         // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
1946         // [31] Digits ::= [0-9]+
1947         //
scanNumber(XPath.Tokens tokens, String data, int endOffset, int currentOffset )1948         private int scanNumber(XPath.Tokens tokens, String/*byte[]*/ data, int endOffset, int currentOffset/*, EncodingSupport encoding*/) {
1949             int ch = data.charAt(currentOffset);
1950             int whole = 0;
1951             int part = 0;
1952             while (ch >= '0' && ch <= '9') {
1953                 whole = (whole * 10) + (ch - '0');
1954                 if (++currentOffset == endOffset) {
1955                     break;
1956                 }
1957                 ch = data.charAt(currentOffset);
1958             }
1959             if (ch == '.') {
1960                 if (++currentOffset < endOffset) {
1961                     /** int start = currentOffset; **/
1962                     ch = data.charAt(currentOffset);
1963                     while (ch >= '0' && ch <= '9') {
1964                         part = (part * 10) + (ch - '0');
1965                         if (++currentOffset == endOffset) {
1966                             break;
1967                         }
1968                         ch = data.charAt(currentOffset);
1969                     }
1970                     if (part != 0) {
1971                         /***
1972                         part = tokens.addSymbol(data, start, currentOffset - start, encoding);
1973                         /***/
1974                         throw new RuntimeException("find a solution!");
1975                         //part = fStringPool.addSymbol(data.substring(start, currentOffset));
1976                         /***/
1977                     }
1978                 }
1979             }
1980             tokens.addToken(whole);
1981             tokens.addToken(part);
1982             return currentOffset;
1983         }
1984 
1985         //
1986         // Protected methods
1987         //
1988 
1989         /**
1990          * This method adds the specified token to the token list. By
1991          * default, this method allows all tokens. However, subclasses
1992          * of the XPathExprScanner can override this method in order
1993          * to disallow certain tokens from being used in the scanned
1994          * XPath expression. This is a convenient way of allowing only
1995          * a subset of XPath.
1996          */
addToken(XPath.Tokens tokens, int token)1997         protected void addToken(XPath.Tokens tokens, int token)
1998             throws XPathException {
1999             tokens.addToken(token);
2000         } // addToken(int)
2001 
2002     } // class Scanner
2003 
2004     //
2005     // MAIN
2006     //
2007 
2008     /** Main program entry. */
main(String[] argv)2009     public static void main(String[] argv) throws Exception {
2010 
2011         for (int i = 0; i < argv.length; i++) {
2012             final String expression = argv[i];
2013             System.out.println("# XPath expression: \""+expression+'"');
2014             try {
2015                 SymbolTable symbolTable = new SymbolTable();
2016                 XPath xpath = new XPath(expression, symbolTable, null);
2017                 System.out.println("expanded xpath: \""+xpath.toString()+'"');
2018             }
2019             catch (XPathException e) {
2020                 System.out.println("error: "+e.getMessage());
2021             }
2022         }
2023 
2024     } // main(String[])
2025 
2026 } // class XPath
2027