1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xpath.internal.compiler; 22 23 import com.sun.org.apache.xml.internal.utils.PrefixResolver; 24 import com.sun.org.apache.xpath.internal.res.XPATHErrorResources; 25 import java.util.List; 26 27 /** 28 * This class is in charge of lexical processing of the XPath 29 * expression into tokens. 30 * 31 * @LastModified: Nov 2017 32 */ 33 class Lexer 34 { 35 36 /** 37 * The target XPath. 38 */ 39 private Compiler m_compiler; 40 41 /** 42 * The prefix resolver to map prefixes to namespaces in the XPath. 43 */ 44 PrefixResolver m_namespaceContext; 45 46 /** 47 * The XPath processor object. 48 */ 49 XPathParser m_processor; 50 51 /** 52 * This value is added to each element name in the TARGETEXTRA 53 * that is a 'target' (right-most top-level element name). 54 */ 55 static final int TARGETEXTRA = 10000; 56 57 /** 58 * Ignore this, it is going away. 59 * This holds a map to the m_tokenQueue that tells where the top-level elements are. 60 * It is used for pattern matching so the m_tokenQueue can be walked backwards. 61 * Each element that is a 'target', (right-most top level element name) has 62 * TARGETEXTRA added to it. 63 * 64 */ 65 private int m_patternMap[] = new int[100]; 66 67 /** 68 * Ignore this, it is going away. 69 * The number of elements that m_patternMap maps; 70 */ 71 private int m_patternMapSize; 72 73 /** 74 * Create a Lexer object. 75 * 76 * @param compiler The owning compiler for this lexer. 77 * @param resolver The prefix resolver for mapping qualified name prefixes 78 * to namespace URIs. 79 * @param xpathProcessor The parser that is processing strings to opcodes. 80 */ Lexer(Compiler compiler, PrefixResolver resolver, XPathParser xpathProcessor)81 Lexer(Compiler compiler, PrefixResolver resolver, 82 XPathParser xpathProcessor) 83 { 84 85 m_compiler = compiler; 86 m_namespaceContext = resolver; 87 m_processor = xpathProcessor; 88 } 89 90 /** 91 * Walk through the expression and build a token queue, and a map of the top-level 92 * elements. 93 * @param pat XSLT Expression. 94 * 95 * @throws javax.xml.transform.TransformerException 96 */ tokenize(String pat)97 void tokenize(String pat) throws javax.xml.transform.TransformerException 98 { 99 tokenize(pat, null); 100 } 101 102 /** 103 * Walk through the expression and build a token queue, and a map of the top-level 104 * elements. 105 * @param pat XSLT Expression. 106 * @param targetStrings a list to hold Strings, may be null. 107 * 108 * @throws javax.xml.transform.TransformerException 109 */ 110 @SuppressWarnings("fallthrough") // on purpose at case '-', '(' and default tokenize(String pat, List<String> targetStrings)111 void tokenize(String pat, List<String> targetStrings) 112 throws javax.xml.transform.TransformerException 113 { 114 115 m_compiler.m_currentPattern = pat; 116 m_patternMapSize = 0; 117 118 // This needs to grow too. 119 m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH); 120 121 int nChars = pat.length(); 122 int startSubstring = -1; 123 int posOfNSSep = -1; 124 boolean isStartOfPat = true; 125 boolean isAttrName = false; 126 boolean isNum = false; 127 128 // Nesting of '[' so we can know if the given element should be 129 // counted inside the m_patternMap. 130 int nesting = 0; 131 132 // char[] chars = pat.toCharArray(); 133 for (int i = 0; i < nChars; i++) 134 { 135 char c = pat.charAt(i); 136 137 switch (c) 138 { 139 case '\"' : 140 { 141 if (startSubstring != -1) 142 { 143 isNum = false; 144 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 145 isAttrName = false; 146 147 if (-1 != posOfNSSep) 148 { 149 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 150 } 151 else 152 { 153 addToTokenQueue(pat.substring(startSubstring, i)); 154 } 155 } 156 157 startSubstring = i; 158 159 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++); 160 161 if (c == '\"' && i < nChars) 162 { 163 addToTokenQueue(pat.substring(startSubstring, i + 1)); 164 165 startSubstring = -1; 166 } 167 else 168 { 169 m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE, 170 null); //"misquoted literal... expected double quote!"); 171 } 172 } 173 break; 174 case '\'' : 175 if (startSubstring != -1) 176 { 177 isNum = false; 178 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 179 isAttrName = false; 180 181 if (-1 != posOfNSSep) 182 { 183 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 184 } 185 else 186 { 187 addToTokenQueue(pat.substring(startSubstring, i)); 188 } 189 } 190 191 startSubstring = i; 192 193 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++); 194 195 if (c == '\'' && i < nChars) 196 { 197 addToTokenQueue(pat.substring(startSubstring, i + 1)); 198 199 startSubstring = -1; 200 } 201 else 202 { 203 m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE, 204 null); //"misquoted literal... expected single quote!"); 205 } 206 break; 207 case 0x0A : 208 case 0x0D : 209 case ' ' : 210 case '\t' : 211 if (startSubstring != -1) 212 { 213 isNum = false; 214 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 215 isAttrName = false; 216 217 if (-1 != posOfNSSep) 218 { 219 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 220 } 221 else 222 { 223 addToTokenQueue(pat.substring(startSubstring, i)); 224 } 225 226 startSubstring = -1; 227 } 228 break; 229 case '@' : 230 isAttrName = true; 231 232 // fall-through on purpose 233 case '-' : 234 if ('-' == c) 235 { 236 if (!(isNum || (startSubstring == -1))) 237 { 238 break; 239 } 240 241 isNum = false; 242 } 243 244 // fall-through on purpose 245 case '(' : 246 case '[' : 247 case ')' : 248 case ']' : 249 case '|' : 250 case '/' : 251 case '*' : 252 case '+' : 253 case '=' : 254 case ',' : 255 case '\\' : // Unused at the moment 256 case '^' : // Unused at the moment 257 case '!' : // Unused at the moment 258 case '$' : 259 case '<' : 260 case '>' : 261 if (startSubstring != -1) 262 { 263 isNum = false; 264 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 265 isAttrName = false; 266 267 if (-1 != posOfNSSep) 268 { 269 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 270 } 271 else 272 { 273 addToTokenQueue(pat.substring(startSubstring, i)); 274 } 275 276 startSubstring = -1; 277 } 278 else if (('/' == c) && isStartOfPat) 279 { 280 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 281 } 282 else if ('*' == c) 283 { 284 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 285 isAttrName = false; 286 } 287 288 if (0 == nesting) 289 { 290 if ('|' == c) 291 { 292 if (null != targetStrings) 293 { 294 recordTokenString(targetStrings); 295 } 296 297 isStartOfPat = true; 298 } 299 } 300 301 if ((')' == c) || (']' == c)) 302 { 303 nesting--; 304 } 305 else if (('(' == c) || ('[' == c)) 306 { 307 nesting++; 308 } 309 310 addToTokenQueue(pat.substring(i, i + 1)); 311 break; 312 case ':' : 313 if (i>0) 314 { 315 if (posOfNSSep == (i - 1)) 316 { 317 if (startSubstring != -1) 318 { 319 if (startSubstring < (i - 1)) 320 addToTokenQueue(pat.substring(startSubstring, i - 1)); 321 } 322 323 isNum = false; 324 isAttrName = false; 325 startSubstring = -1; 326 posOfNSSep = -1; 327 328 addToTokenQueue(pat.substring(i - 1, i + 1)); 329 330 break; 331 } 332 else 333 { 334 posOfNSSep = i; 335 } 336 } 337 338 // fall through on purpose 339 default : 340 if (-1 == startSubstring) 341 { 342 startSubstring = i; 343 isNum = Character.isDigit(c); 344 } 345 else if (isNum) 346 { 347 isNum = Character.isDigit(c); 348 } 349 } 350 } 351 352 if (startSubstring != -1) 353 { 354 isNum = false; 355 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 356 357 if ((-1 != posOfNSSep) || 358 ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes()))) 359 { 360 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars); 361 } 362 else 363 { 364 addToTokenQueue(pat.substring(startSubstring, nChars)); 365 } 366 } 367 368 if (0 == m_compiler.getTokenQueueSize()) 369 { 370 m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!"); 371 } 372 else if (null != targetStrings) 373 { 374 recordTokenString(targetStrings); 375 } 376 377 m_processor.m_queueMark = 0; 378 } 379 380 /** 381 * Record the current position on the token queue as long as 382 * this is a top-level element. Must be called before the 383 * next token is added to the m_tokenQueue. 384 * 385 * @param nesting The nesting count for the pattern element. 386 * @param isStart true if this is the start of a pattern. 387 * @param isAttrName true if we have determined that this is an attribute name. 388 * 389 * @return true if this is the start of a pattern. 390 */ mapPatternElemPos(int nesting, boolean isStart, boolean isAttrName)391 private boolean mapPatternElemPos(int nesting, boolean isStart, 392 boolean isAttrName) 393 { 394 395 if (0 == nesting) 396 { 397 if(m_patternMapSize >= m_patternMap.length) 398 { 399 int patternMap[] = m_patternMap; 400 int len = m_patternMap.length; 401 m_patternMap = new int[m_patternMapSize + 100]; 402 System.arraycopy(patternMap, 0, m_patternMap, 0, len); 403 } 404 if (!isStart) 405 { 406 m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA; 407 } 408 m_patternMap[m_patternMapSize] = 409 (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA; 410 411 m_patternMapSize++; 412 413 isStart = false; 414 } 415 416 return isStart; 417 } 418 419 /** 420 * Given a map pos, return the corresponding token queue pos. 421 * 422 * @param i The index in the m_patternMap. 423 * 424 * @return the token queue position. 425 */ getTokenQueuePosFromMap(int i)426 private int getTokenQueuePosFromMap(int i) 427 { 428 429 int pos = m_patternMap[i]; 430 431 return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos; 432 } 433 434 /** 435 * Reset token queue mark and m_token to a 436 * given position. 437 * @param mark The new position. 438 */ resetTokenMark(int mark)439 private final void resetTokenMark(int mark) 440 { 441 442 int qsz = m_compiler.getTokenQueueSize(); 443 444 m_processor.m_queueMark = (mark > 0) 445 ? ((mark <= qsz) ? mark - 1 : mark) : 0; 446 447 if (m_processor.m_queueMark < qsz) 448 { 449 m_processor.m_token = 450 (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++); 451 m_processor.m_tokenChar = m_processor.m_token.charAt(0); 452 } 453 else 454 { 455 m_processor.m_token = null; 456 m_processor.m_tokenChar = 0; 457 } 458 } 459 460 /** 461 * Given a string, return the corresponding keyword token. 462 * 463 * @param key The keyword. 464 * 465 * @return An opcode value. 466 */ getKeywordToken(String key)467 final int getKeywordToken(String key) 468 { 469 470 int tok; 471 472 try 473 { 474 Integer itok = Keywords.getKeyWord(key); 475 476 tok = (null != itok) ? itok.intValue() : 0; 477 } 478 catch (NullPointerException npe) 479 { 480 tok = 0; 481 } 482 catch (ClassCastException cce) 483 { 484 tok = 0; 485 } 486 487 return tok; 488 } 489 490 /** 491 * Record the current token in the passed vector. 492 * 493 * @param targetStrings a list of strings. 494 */ recordTokenString(List<String> targetStrings)495 private void recordTokenString(List<String> targetStrings) 496 { 497 498 int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1); 499 500 resetTokenMark(tokPos + 1); 501 502 if (m_processor.lookahead('(', 1)) 503 { 504 int tok = getKeywordToken(m_processor.m_token); 505 506 switch (tok) 507 { 508 case OpCodes.NODETYPE_COMMENT : 509 targetStrings.add(PsuedoNames.PSEUDONAME_COMMENT); 510 break; 511 case OpCodes.NODETYPE_TEXT : 512 targetStrings.add(PsuedoNames.PSEUDONAME_TEXT); 513 break; 514 case OpCodes.NODETYPE_NODE : 515 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 516 break; 517 case OpCodes.NODETYPE_ROOT : 518 targetStrings.add(PsuedoNames.PSEUDONAME_ROOT); 519 break; 520 case OpCodes.NODETYPE_ANYELEMENT : 521 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 522 break; 523 case OpCodes.NODETYPE_PI : 524 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 525 break; 526 default : 527 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 528 } 529 } 530 else 531 { 532 if (m_processor.tokenIs('@')) 533 { 534 tokPos++; 535 536 resetTokenMark(tokPos + 1); 537 } 538 539 if (m_processor.lookahead(':', 1)) 540 { 541 tokPos += 2; 542 } 543 544 targetStrings.add((String)m_compiler.getTokenQueue().elementAt(tokPos)); 545 } 546 } 547 548 /** 549 * Add a token to the token queue. 550 * 551 * 552 * @param s The token. 553 */ addToTokenQueue(String s)554 private final void addToTokenQueue(String s) 555 { 556 m_compiler.getTokenQueue().addElement(s); 557 } 558 559 /** 560 * When a seperator token is found, see if there's a element name or 561 * the like to map. 562 * 563 * @param pat The XPath name string. 564 * @param startSubstring The start of the name string. 565 * @param posOfNSSep The position of the namespace seperator (':'). 566 * @param posOfScan The end of the name index. 567 * 568 * @throws javax.xml.transform.TransformerException 569 * 570 * @return -1 always. 571 */ mapNSTokens(String pat, int startSubstring, int posOfNSSep, int posOfScan)572 private int mapNSTokens(String pat, int startSubstring, int posOfNSSep, 573 int posOfScan) 574 throws javax.xml.transform.TransformerException 575 { 576 577 String prefix = ""; 578 579 if ((startSubstring >= 0) && (posOfNSSep >= 0)) 580 { 581 prefix = pat.substring(startSubstring, posOfNSSep); 582 } 583 String uName; 584 585 if ((null != m_namespaceContext) &&!prefix.equals("*") 586 &&!prefix.equals("xmlns")) 587 { 588 try 589 { 590 if (prefix.length() > 0) 591 uName = m_namespaceContext.getNamespaceForPrefix(prefix); 592 else 593 { 594 595 // Assume last was wildcard. This is not legal according 596 // to the draft. Set the below to true to make namespace 597 // wildcards work. 598 if (false) 599 { 600 addToTokenQueue(":"); 601 602 String s = pat.substring(posOfNSSep + 1, posOfScan); 603 604 if (s.length() > 0) 605 addToTokenQueue(s); 606 607 return -1; 608 } 609 else 610 { 611 uName = m_namespaceContext.getNamespaceForPrefix(prefix); 612 } 613 } 614 } 615 catch (ClassCastException cce) 616 { 617 uName = m_namespaceContext.getNamespaceForPrefix(prefix); 618 } 619 } 620 else 621 { 622 uName = prefix; 623 } 624 625 if ((null != uName) && (uName.length() > 0)) 626 { 627 addToTokenQueue(uName); 628 addToTokenQueue(":"); 629 630 String s = pat.substring(posOfNSSep + 1, posOfScan); 631 632 if (s.length() > 0) 633 addToTokenQueue(s); 634 } 635 else 636 { 637 m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE, 638 new String[] {prefix}); //"Prefix must resolve to a namespace: {0}"; 639 } 640 641 return -1; 642 } 643 } 644