1 /*
2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xpath.internal.compiler;
22 
23 import com.sun.org.apache.xml.internal.utils.PrefixResolver;
24 import com.sun.org.apache.xpath.internal.res.XPATHErrorResources;
25 import java.util.List;
26 
27 /**
28  * This class is in charge of lexical processing of the XPath
29  * expression into tokens.
30  *
31  * @LastModified: Nov 2017
32  */
33 class Lexer
34 {
35 
36   /**
37    * The target XPath.
38    */
39   private Compiler m_compiler;
40 
41   /**
42    * The prefix resolver to map prefixes to namespaces in the XPath.
43    */
44   PrefixResolver m_namespaceContext;
45 
46   /**
47    * The XPath processor object.
48    */
49   XPathParser m_processor;
50 
51   /**
52    * This value is added to each element name in the TARGETEXTRA
53    * that is a 'target' (right-most top-level element name).
54    */
55   static final int TARGETEXTRA = 10000;
56 
57   /**
58    * Ignore this, it is going away.
59    * This holds a map to the m_tokenQueue that tells where the top-level elements are.
60    * It is used for pattern matching so the m_tokenQueue can be walked backwards.
61    * Each element that is a 'target', (right-most top level element name) has
62    * TARGETEXTRA added to it.
63    *
64    */
65   private int m_patternMap[] = new int[100];
66 
67   /**
68    * Ignore this, it is going away.
69    * The number of elements that m_patternMap maps;
70    */
71   private int m_patternMapSize;
72 
73   /**
74    * Create a Lexer object.
75    *
76    * @param compiler The owning compiler for this lexer.
77    * @param resolver The prefix resolver for mapping qualified name prefixes
78    *                 to namespace URIs.
79    * @param xpathProcessor The parser that is processing strings to opcodes.
80    */
Lexer(Compiler compiler, PrefixResolver resolver, XPathParser xpathProcessor)81   Lexer(Compiler compiler, PrefixResolver resolver,
82         XPathParser xpathProcessor)
83   {
84 
85     m_compiler = compiler;
86     m_namespaceContext = resolver;
87     m_processor = xpathProcessor;
88   }
89 
90   /**
91    * Walk through the expression and build a token queue, and a map of the top-level
92    * elements.
93    * @param pat XSLT Expression.
94    *
95    * @throws javax.xml.transform.TransformerException
96    */
tokenize(String pat)97   void tokenize(String pat) throws javax.xml.transform.TransformerException
98   {
99     tokenize(pat, null);
100   }
101 
102   /**
103    * Walk through the expression and build a token queue, and a map of the top-level
104    * elements.
105    * @param pat XSLT Expression.
106    * @param targetStrings a list to hold Strings, may be null.
107    *
108    * @throws javax.xml.transform.TransformerException
109    */
110   @SuppressWarnings("fallthrough") // on purpose at case '-', '(' and default
tokenize(String pat, List<String> targetStrings)111   void tokenize(String pat, List<String> targetStrings)
112           throws javax.xml.transform.TransformerException
113   {
114 
115     m_compiler.m_currentPattern = pat;
116     m_patternMapSize = 0;
117 
118     // This needs to grow too.
119     m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
120 
121     int nChars = pat.length();
122     int startSubstring = -1;
123     int posOfNSSep = -1;
124     boolean isStartOfPat = true;
125     boolean isAttrName = false;
126     boolean isNum = false;
127 
128     // Nesting of '[' so we can know if the given element should be
129     // counted inside the m_patternMap.
130     int nesting = 0;
131 
132     // char[] chars = pat.toCharArray();
133     for (int i = 0; i < nChars; i++)
134     {
135       char c = pat.charAt(i);
136 
137       switch (c)
138       {
139       case '\"' :
140       {
141         if (startSubstring != -1)
142         {
143           isNum = false;
144           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
145           isAttrName = false;
146 
147           if (-1 != posOfNSSep)
148           {
149             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
150           }
151           else
152           {
153             addToTokenQueue(pat.substring(startSubstring, i));
154           }
155         }
156 
157         startSubstring = i;
158 
159         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
160 
161         if (c == '\"' && i < nChars)
162         {
163           addToTokenQueue(pat.substring(startSubstring, i + 1));
164 
165           startSubstring = -1;
166         }
167         else
168         {
169           m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
170                             null);  //"misquoted literal... expected double quote!");
171         }
172       }
173       break;
174       case '\'' :
175         if (startSubstring != -1)
176         {
177           isNum = false;
178           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
179           isAttrName = false;
180 
181           if (-1 != posOfNSSep)
182           {
183             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
184           }
185           else
186           {
187             addToTokenQueue(pat.substring(startSubstring, i));
188           }
189         }
190 
191         startSubstring = i;
192 
193         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
194 
195         if (c == '\'' && i < nChars)
196         {
197           addToTokenQueue(pat.substring(startSubstring, i + 1));
198 
199           startSubstring = -1;
200         }
201         else
202         {
203           m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
204                             null);  //"misquoted literal... expected single quote!");
205         }
206         break;
207       case 0x0A :
208       case 0x0D :
209       case ' ' :
210       case '\t' :
211         if (startSubstring != -1)
212         {
213           isNum = false;
214           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
215           isAttrName = false;
216 
217           if (-1 != posOfNSSep)
218           {
219             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
220           }
221           else
222           {
223             addToTokenQueue(pat.substring(startSubstring, i));
224           }
225 
226           startSubstring = -1;
227         }
228         break;
229       case '@' :
230         isAttrName = true;
231 
232       // fall-through on purpose
233       case '-' :
234         if ('-' == c)
235         {
236           if (!(isNum || (startSubstring == -1)))
237           {
238             break;
239           }
240 
241           isNum = false;
242         }
243 
244       // fall-through on purpose
245       case '(' :
246       case '[' :
247       case ')' :
248       case ']' :
249       case '|' :
250       case '/' :
251       case '*' :
252       case '+' :
253       case '=' :
254       case ',' :
255       case '\\' :  // Unused at the moment
256       case '^' :  // Unused at the moment
257       case '!' :  // Unused at the moment
258       case '$' :
259       case '<' :
260       case '>' :
261         if (startSubstring != -1)
262         {
263           isNum = false;
264           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
265           isAttrName = false;
266 
267           if (-1 != posOfNSSep)
268           {
269             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
270           }
271           else
272           {
273             addToTokenQueue(pat.substring(startSubstring, i));
274           }
275 
276           startSubstring = -1;
277         }
278         else if (('/' == c) && isStartOfPat)
279         {
280           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
281         }
282         else if ('*' == c)
283         {
284           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
285           isAttrName = false;
286         }
287 
288         if (0 == nesting)
289         {
290           if ('|' == c)
291           {
292             if (null != targetStrings)
293             {
294               recordTokenString(targetStrings);
295             }
296 
297             isStartOfPat = true;
298           }
299         }
300 
301         if ((')' == c) || (']' == c))
302         {
303           nesting--;
304         }
305         else if (('(' == c) || ('[' == c))
306         {
307           nesting++;
308         }
309 
310         addToTokenQueue(pat.substring(i, i + 1));
311         break;
312       case ':' :
313         if (i>0)
314         {
315           if (posOfNSSep == (i - 1))
316           {
317             if (startSubstring != -1)
318             {
319               if (startSubstring < (i - 1))
320                 addToTokenQueue(pat.substring(startSubstring, i - 1));
321             }
322 
323             isNum = false;
324             isAttrName = false;
325             startSubstring = -1;
326             posOfNSSep = -1;
327 
328             addToTokenQueue(pat.substring(i - 1, i + 1));
329 
330             break;
331           }
332           else
333           {
334             posOfNSSep = i;
335           }
336         }
337 
338       // fall through on purpose
339       default :
340         if (-1 == startSubstring)
341         {
342           startSubstring = i;
343           isNum = Character.isDigit(c);
344         }
345         else if (isNum)
346         {
347           isNum = Character.isDigit(c);
348         }
349       }
350     }
351 
352     if (startSubstring != -1)
353     {
354       isNum = false;
355       isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
356 
357       if ((-1 != posOfNSSep) ||
358          ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
359       {
360         posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
361       }
362       else
363       {
364         addToTokenQueue(pat.substring(startSubstring, nChars));
365       }
366     }
367 
368     if (0 == m_compiler.getTokenQueueSize())
369     {
370       m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
371     }
372     else if (null != targetStrings)
373     {
374       recordTokenString(targetStrings);
375     }
376 
377     m_processor.m_queueMark = 0;
378   }
379 
380   /**
381    * Record the current position on the token queue as long as
382    * this is a top-level element.  Must be called before the
383    * next token is added to the m_tokenQueue.
384    *
385    * @param nesting The nesting count for the pattern element.
386    * @param isStart true if this is the start of a pattern.
387    * @param isAttrName true if we have determined that this is an attribute name.
388    *
389    * @return true if this is the start of a pattern.
390    */
mapPatternElemPos(int nesting, boolean isStart, boolean isAttrName)391   private boolean mapPatternElemPos(int nesting, boolean isStart,
392                                     boolean isAttrName)
393   {
394 
395     if (0 == nesting)
396     {
397       if(m_patternMapSize >= m_patternMap.length)
398       {
399         int patternMap[] = m_patternMap;
400         int len = m_patternMap.length;
401         m_patternMap = new int[m_patternMapSize + 100];
402         System.arraycopy(patternMap, 0, m_patternMap, 0, len);
403       }
404       if (!isStart)
405       {
406         m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
407       }
408       m_patternMap[m_patternMapSize] =
409         (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
410 
411       m_patternMapSize++;
412 
413       isStart = false;
414     }
415 
416     return isStart;
417   }
418 
419   /**
420    * Given a map pos, return the corresponding token queue pos.
421    *
422    * @param i The index in the m_patternMap.
423    *
424    * @return the token queue position.
425    */
getTokenQueuePosFromMap(int i)426   private int getTokenQueuePosFromMap(int i)
427   {
428 
429     int pos = m_patternMap[i];
430 
431     return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
432   }
433 
434   /**
435    * Reset token queue mark and m_token to a
436    * given position.
437    * @param mark The new position.
438    */
resetTokenMark(int mark)439   private final void resetTokenMark(int mark)
440   {
441 
442     int qsz = m_compiler.getTokenQueueSize();
443 
444     m_processor.m_queueMark = (mark > 0)
445                               ? ((mark <= qsz) ? mark - 1 : mark) : 0;
446 
447     if (m_processor.m_queueMark < qsz)
448     {
449       m_processor.m_token =
450         (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
451       m_processor.m_tokenChar = m_processor.m_token.charAt(0);
452     }
453     else
454     {
455       m_processor.m_token = null;
456       m_processor.m_tokenChar = 0;
457     }
458   }
459 
460   /**
461    * Given a string, return the corresponding keyword token.
462    *
463    * @param key The keyword.
464    *
465    * @return An opcode value.
466    */
getKeywordToken(String key)467   final int getKeywordToken(String key)
468   {
469 
470     int tok;
471 
472     try
473     {
474       Integer itok = Keywords.getKeyWord(key);
475 
476       tok = (null != itok) ? itok.intValue() : 0;
477     }
478     catch (NullPointerException npe)
479     {
480       tok = 0;
481     }
482     catch (ClassCastException cce)
483     {
484       tok = 0;
485     }
486 
487     return tok;
488   }
489 
490   /**
491    * Record the current token in the passed vector.
492    *
493    * @param targetStrings a list of strings.
494    */
recordTokenString(List<String> targetStrings)495   private void recordTokenString(List<String> targetStrings)
496   {
497 
498     int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
499 
500     resetTokenMark(tokPos + 1);
501 
502     if (m_processor.lookahead('(', 1))
503     {
504       int tok = getKeywordToken(m_processor.m_token);
505 
506       switch (tok)
507       {
508       case OpCodes.NODETYPE_COMMENT :
509         targetStrings.add(PsuedoNames.PSEUDONAME_COMMENT);
510         break;
511       case OpCodes.NODETYPE_TEXT :
512         targetStrings.add(PsuedoNames.PSEUDONAME_TEXT);
513         break;
514       case OpCodes.NODETYPE_NODE :
515         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
516         break;
517       case OpCodes.NODETYPE_ROOT :
518         targetStrings.add(PsuedoNames.PSEUDONAME_ROOT);
519         break;
520       case OpCodes.NODETYPE_ANYELEMENT :
521         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
522         break;
523       case OpCodes.NODETYPE_PI :
524         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
525         break;
526       default :
527         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
528       }
529     }
530     else
531     {
532       if (m_processor.tokenIs('@'))
533       {
534         tokPos++;
535 
536         resetTokenMark(tokPos + 1);
537       }
538 
539       if (m_processor.lookahead(':', 1))
540       {
541         tokPos += 2;
542       }
543 
544       targetStrings.add((String)m_compiler.getTokenQueue().elementAt(tokPos));
545     }
546   }
547 
548   /**
549    * Add a token to the token queue.
550    *
551    *
552    * @param s The token.
553    */
addToTokenQueue(String s)554   private final void addToTokenQueue(String s)
555   {
556     m_compiler.getTokenQueue().addElement(s);
557   }
558 
559   /**
560    * When a seperator token is found, see if there's a element name or
561    * the like to map.
562    *
563    * @param pat The XPath name string.
564    * @param startSubstring The start of the name string.
565    * @param posOfNSSep The position of the namespace seperator (':').
566    * @param posOfScan The end of the name index.
567    *
568    * @throws javax.xml.transform.TransformerException
569    *
570    * @return -1 always.
571    */
mapNSTokens(String pat, int startSubstring, int posOfNSSep, int posOfScan)572   private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
573                           int posOfScan)
574            throws javax.xml.transform.TransformerException
575  {
576 
577     String prefix = "";
578 
579     if ((startSubstring >= 0) && (posOfNSSep >= 0))
580     {
581        prefix = pat.substring(startSubstring, posOfNSSep);
582     }
583     String uName;
584 
585     if ((null != m_namespaceContext) &&!prefix.equals("*")
586             &&!prefix.equals("xmlns"))
587     {
588       try
589       {
590         if (prefix.length() > 0)
591           uName = m_namespaceContext.getNamespaceForPrefix(prefix);
592         else
593         {
594 
595           // Assume last was wildcard. This is not legal according
596           // to the draft. Set the below to true to make namespace
597           // wildcards work.
598           if (false)
599           {
600             addToTokenQueue(":");
601 
602             String s = pat.substring(posOfNSSep + 1, posOfScan);
603 
604             if (s.length() > 0)
605               addToTokenQueue(s);
606 
607             return -1;
608           }
609           else
610           {
611             uName = m_namespaceContext.getNamespaceForPrefix(prefix);
612           }
613         }
614       }
615       catch (ClassCastException cce)
616       {
617         uName = m_namespaceContext.getNamespaceForPrefix(prefix);
618       }
619     }
620     else
621     {
622       uName = prefix;
623     }
624 
625     if ((null != uName) && (uName.length() > 0))
626     {
627       addToTokenQueue(uName);
628       addToTokenQueue(":");
629 
630       String s = pat.substring(posOfNSSep + 1, posOfScan);
631 
632       if (s.length() > 0)
633         addToTokenQueue(s);
634     }
635     else
636     {
637         m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
638                 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
639     }
640 
641     return -1;
642   }
643 }
644