1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * $Id$ 20 */ 21 22 #if !defined(XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP) 23 #define XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP 24 25 // --------------------------------------------------------------------------- 26 // Includes 27 // --------------------------------------------------------------------------- 28 #include <xercesc/util/RefArrayVectorOf.hpp> 29 #include <xercesc/util/XMLString.hpp> 30 #include <xercesc/util/Janitor.hpp> 31 #include <xercesc/util/regx/Op.hpp> 32 #include <xercesc/util/regx/TokenFactory.hpp> 33 #include <xercesc/util/regx/BMPattern.hpp> 34 #include <xercesc/util/regx/OpFactory.hpp> 35 #include <xercesc/util/regx/RegxUtil.hpp> 36 37 XERCES_CPP_NAMESPACE_BEGIN 38 39 // --------------------------------------------------------------------------- 40 // Forward Declaration 41 // --------------------------------------------------------------------------- 42 class RangeToken; 43 class Match; 44 class RegxParser; 45 46 /** 47 * The RegularExpression class represents a parsed executable regular expression. 48 * This class is thread safe. Two similar regular expression syntaxes are 49 * supported: 50 * 51 * <ol> 52 * <li><a href="http://www.w3.org/TR/xpath-functions/#regex-syntax">The XPath 2.0 / XQuery regular expression syntax.</a> 53 * <li><a href="http://www.w3.org/TR/xmlschema-2/#regexs">The XML Schema regular expression syntax.</a></li> 54 * </ol> 55 * 56 * XPath 2.0 regular expression syntax is used unless the "X" option is specified during construction. 57 * 58 * Options can be specified during construction to change the way that the regular expression is handled. 59 * Options are specified by a string consisting of any number of the following characters: 60 * 61 * <table border='1'> 62 * <tr> 63 * <th>Character</th> 64 * <th>Meaning</th> 65 * </tr> 66 * <tr> 67 * <td valign='top' rowspan='1' colspan='1'>i</td> 68 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags"> 69 * Ignore case</a> when matching the regular expression.</td> 70 * </tr> 71 * <tr> 72 * <td valign='top' rowspan='1' colspan='1'>m</td> 73 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags"> 74 * Multi-line mode</a>. The meta characters "^" and "$" will match the beginning and end of lines.</td> 75 * </tr> 76 * <tr> 77 * <td valign='top' rowspan='1' colspan='1'>s</td> 78 * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags"> 79 * Single-line mode</a>. The meta character "." will match a newline character.</td> 80 * </tr> 81 * <tr> 82 * <td valign='top' rowspan='1' colspan='1'>x</td> 83 * <td valign='top' rowspan='1' colspan='1'>Allow extended comments.</td> 84 * </tr> 85 * <tr> 86 * <td valign='top' rowspan='1' colspan='1'>F</td> 87 * <td valign='top' rowspan='1' colspan='1'>Prohibit the fixed string optimization.</td> 88 * </tr> 89 * <tr> 90 * <td valign='top' rowspan='1' colspan='1'>H</td> 91 * <td valign='top' rowspan='1' colspan='1'>Prohibit the head character optimization.</td> 92 * </tr> 93 * <tr> 94 * <td valign='top' rowspan='1' colspan='1'>X</td> 95 * <td valign='top' rowspan='1' colspan='1'>Parse the regular expression according to the 96 * <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema regular expression syntax</a>.</td> 97 * </tr> 98 * </table> 99 */ 100 class XMLUTIL_EXPORT RegularExpression : public XMemory 101 { 102 public: 103 // ----------------------------------------------------------------------- 104 // Public Constructors and Destructor 105 // ----------------------------------------------------------------------- 106 107 /** @name Constructors and destructor */ 108 //@{ 109 110 /** Parses the given regular expression. 111 * 112 * @param pattern the regular expression in the local code page 113 * @param manager the memory manager to use 114 */ 115 RegularExpression 116 ( 117 const char* const pattern 118 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 119 ); 120 121 /** Parses the given regular expression using the options specified. 122 * 123 * @param pattern the regular expression in the local code page 124 * @param options the options string in the local code page 125 * @param manager the memory manager to use 126 */ 127 RegularExpression 128 ( 129 const char* const pattern 130 , const char* const options 131 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 132 ); 133 134 /** Parses the given regular expression. 135 * 136 * @param pattern the regular expression 137 * @param manager the memory manager to use 138 */ 139 RegularExpression 140 ( 141 const XMLCh* const pattern 142 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 143 ); 144 145 /** Parses the given regular expression using the options specified. 146 * 147 * @param pattern the regular expression 148 * @param options the options string 149 * @param manager the memory manager to use 150 */ 151 RegularExpression 152 ( 153 const XMLCh* const pattern 154 , const XMLCh* const options 155 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 156 ); 157 158 virtual ~RegularExpression(); 159 160 //@} 161 162 // ----------------------------------------------------------------------- 163 // Public Constants 164 // ----------------------------------------------------------------------- 165 static const unsigned int IGNORE_CASE; 166 static const unsigned int SINGLE_LINE; 167 static const unsigned int MULTIPLE_LINE; 168 static const unsigned int EXTENDED_COMMENT; 169 static const unsigned int PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; 170 static const unsigned int PROHIBIT_FIXED_STRING_OPTIMIZATION; 171 static const unsigned int XMLSCHEMA_MODE; 172 typedef enum 173 { 174 wordTypeIgnore = 0, 175 wordTypeLetter = 1, 176 wordTypeOther = 2 177 } wordType; 178 179 // ----------------------------------------------------------------------- 180 // Public Helper methods 181 // ----------------------------------------------------------------------- 182 183 /** @name Public helper methods */ 184 //@{ 185 186 static int getOptionValue(const XMLCh ch); 187 static bool isSet(const int options, const int flag); 188 189 //@} 190 191 // ----------------------------------------------------------------------- 192 // Matching methods 193 // ----------------------------------------------------------------------- 194 195 /** @name Matching methods */ 196 //@{ 197 198 /** Tries to match the given null terminated string against the regular expression, returning 199 * true if successful. 200 * 201 * @param matchString the string to match in the local code page 202 * @param manager the memory manager to use 203 * 204 * @return Whether the string matched the regular expression or not. 205 */ 206 bool matches(const char* const matchString, 207 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 208 209 /** Tries to match the given string between the specified start and end offsets 210 * against the regular expression, returning true if successful. 211 * 212 * @param matchString the string to match in the local code page 213 * @param start the offset of the start of the string 214 * @param end the offset of the end of the string 215 * @param manager the memory manager to use 216 * 217 * @return Whether the string matched the regular expression or not. 218 */ 219 bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end, 220 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 221 222 /** Tries to match the given null terminated string against the regular expression, returning 223 * true if successful. 224 * 225 * @param matchString the string to match in the local code page 226 * @param pMatch a Match object, which will be populated with the offsets for the 227 * regular expression match and sub-matches. 228 * @param manager the memory manager to use 229 * 230 * @return Whether the string matched the regular expression or not. 231 */ 232 bool matches(const char* const matchString, Match* const pMatch, 233 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 234 235 /** Tries to match the given string between the specified start and end offsets 236 * against the regular expression, returning true if successful. 237 * 238 * @param matchString the string to match in the local code page 239 * @param start the offset of the start of the string 240 * @param end the offset of the end of the string 241 * @param pMatch a Match object, which will be populated with the offsets for the 242 * regular expression match and sub-matches. 243 * @param manager the memory manager to use 244 * 245 * @return Whether the string matched the regular expression or not. 246 */ 247 bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end, 248 Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 249 250 /** Tries to match the given null terminated string against the regular expression, returning 251 * true if successful. 252 * 253 * @param matchString the string to match 254 * @param manager the memory manager to use 255 * 256 * @return Whether the string matched the regular expression or not. 257 */ 258 bool matches(const XMLCh* const matchString, 259 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 260 261 /** Tries to match the given string between the specified start and end offsets 262 * against the regular expression, returning true if successful. 263 * 264 * @param matchString the string to match 265 * @param start the offset of the start of the string 266 * @param end the offset of the end of the string 267 * @param manager the memory manager to use 268 * 269 * @return Whether the string matched the regular expression or not. 270 */ 271 bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 272 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 273 274 /** Tries to match the given null terminated string against the regular expression, returning 275 * true if successful. 276 * 277 * @param matchString the string to match 278 * @param pMatch a Match object, which will be populated with the offsets for the 279 * regular expression match and sub-matches. 280 * @param manager the memory manager to use 281 * 282 * @return Whether the string matched the regular expression or not. 283 */ 284 bool matches(const XMLCh* const matchString, Match* const pMatch, 285 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 286 287 /** Tries to match the given string between the specified start and end offsets 288 * against the regular expression, returning true if successful. 289 * 290 * @param matchString the string to match 291 * @param start the offset of the start of the string 292 * @param end the offset of the end of the string 293 * @param pMatch a Match object, which will be populated with the offsets for the 294 * regular expression match and sub-matches. 295 * @param manager the memory manager to use 296 * 297 * @return Whether the string matched the regular expression or not. 298 */ 299 bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 300 Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 301 302 /** Tries to match the given string between the specified start and end offsets 303 * against the regular expression. The subEx vector is populated with the details 304 * for every non-overlapping occurrence of a match in the string. 305 * 306 * @param matchString the string to match 307 * @param start the offset of the start of the string 308 * @param end the offset of the end of the string 309 * @param subEx a RefVectorOf Match objects, populated with the offsets for the 310 * regular expression match and sub-matches. 311 * @param manager the memory manager to use 312 */ 313 void allMatches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 314 RefVectorOf<Match> *subEx, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 315 316 //@} 317 318 // ----------------------------------------------------------------------- 319 // Tokenize methods 320 // ----------------------------------------------------------------------- 321 // Note: The caller owns the string vector that is returned, and is responsible 322 // for deleting it. 323 324 /** @name Tokenize methods */ 325 //@{ 326 327 /** Tokenizes the null terminated string according to the regular expression, returning 328 * the parts of the string that do not match the regular expression. 329 * 330 * @param matchString the string to match in the local code page 331 * @param manager the memory manager to use 332 * 333 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 334 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 335 * deleting it. 336 */ 337 RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, 338 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 339 340 /** Tokenizes the string between the specified start and end offsets according to the regular 341 * expression, returning the parts of the string that do not match the regular expression. 342 * 343 * @param matchString the string to match in the local code page 344 * @param start the offset of the start of the string 345 * @param end the offset of the end of the string 346 * @param manager the memory manager to use 347 * 348 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 349 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 350 * deleting it. 351 */ 352 RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const XMLSize_t start, const XMLSize_t end, 353 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 354 355 /** Tokenizes the null terminated string according to the regular expression, returning 356 * the parts of the string that do not match the regular expression. 357 * 358 * @param matchString the string to match 359 * @param manager the memory manager to use 360 * 361 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 362 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 363 * deleting it. 364 */ 365 RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, 366 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 367 368 /** Tokenizes the string between the specified start and end offsets according to the regular 369 * expression, returning the parts of the string that do not match the regular expression. 370 * 371 * @param matchString the string to match 372 * @param start the offset of the start of the string 373 * @param end the offset of the end of the string 374 * @param manager the memory manager to use 375 * 376 * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the 377 * given MemoryManager. The caller owns the string vector that is returned, and is responsible for 378 * deleting it. 379 */ 380 RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end, 381 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 382 383 //@} 384 385 // ----------------------------------------------------------------------- 386 // Replace methods 387 // ----------------------------------------------------------------------- 388 // Note: The caller owns the XMLCh* that is returned, and is responsible for 389 // deleting it. 390 391 /** @name Replace methods */ 392 //@{ 393 394 /** Performs a search and replace on the given null terminated string, replacing 395 * any substring that matches the regular expression with a string derived from 396 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 397 * 398 * @param matchString the string to match in the local code page 399 * @param replaceString the string to replace in the local code page 400 * @param manager the memory manager to use 401 * 402 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 403 * that is returned, and is responsible for deleting it. 404 */ 405 XMLCh *replace(const char* const matchString, const char* const replaceString, 406 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 407 408 /** Performs a search and replace on the given string between the specified start and end offsets, replacing 409 * any substring that matches the regular expression with a string derived from 410 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 411 * 412 * @param matchString the string to match in the local code page 413 * @param replaceString the string to replace in the local code page 414 * @param start the offset of the start of the string 415 * @param end the offset of the end of the string 416 * @param manager the memory manager to use 417 * 418 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 419 * that is returned, and is responsible for deleting it. 420 */ 421 XMLCh *replace(const char* const matchString, const char* const replaceString, 422 const XMLSize_t start, const XMLSize_t end, 423 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 424 425 /** Performs a search and replace on the given null terminated string, replacing 426 * any substring that matches the regular expression with a string derived from 427 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 428 * 429 * @param matchString the string to match 430 * @param replaceString the string to replace 431 * @param manager the memory manager to use 432 * 433 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 434 * that is returned, and is responsible for deleting it. 435 */ 436 XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString, 437 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 438 439 /** Performs a search and replace on the given string between the specified start and end offsets, replacing 440 * any substring that matches the regular expression with a string derived from 441 * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>. 442 * 443 * @param matchString the string to match 444 * @param replaceString the string to replace 445 * @param start the offset of the start of the string 446 * @param end the offset of the end of the string 447 * @param manager the memory manager to use 448 * 449 * @return The resulting string allocated using the given MemoryManager. The caller owns the string 450 * that is returned, and is responsible for deleting it. 451 */ 452 XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString, 453 const XMLSize_t start, const XMLSize_t end, 454 MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const; 455 456 //@} 457 458 // ----------------------------------------------------------------------- 459 // Static initialize and cleanup methods 460 // ----------------------------------------------------------------------- 461 462 /** @name Static initilize and cleanup methods */ 463 //@{ 464 465 static void 466 staticInitialize(MemoryManager* memoryManager); 467 468 static void 469 staticCleanup(); 470 471 //@} 472 473 protected: 474 virtual RegxParser* getRegexParser(const int options, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 475 476 // ----------------------------------------------------------------------- 477 // Cleanup methods 478 // ----------------------------------------------------------------------- 479 void cleanUp(); 480 481 // ----------------------------------------------------------------------- 482 // Setter methods 483 // ----------------------------------------------------------------------- 484 void setPattern(const XMLCh* const pattern, const XMLCh* const options=0); 485 486 // ----------------------------------------------------------------------- 487 // Protected data types 488 // ----------------------------------------------------------------------- 489 class XMLUTIL_EXPORT Context : public XMemory 490 { 491 public : 492 Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 493 Context(Context* src); 494 ~Context(); 495 496 Context& operator= (const Context& other); getString() const497 inline const XMLCh* getString() const { return fString; } 498 void reset(const XMLCh* const string, const XMLSize_t stringLen, 499 const XMLSize_t start, const XMLSize_t limit, const int noClosures, 500 const unsigned int options); 501 bool nextCh(XMLInt32& ch, XMLSize_t& offset); 502 503 bool fAdoptMatch; 504 XMLSize_t fStart; 505 XMLSize_t fLimit; 506 XMLSize_t fLength; // fLimit - fStart 507 int fSize; 508 XMLSize_t fStringMaxLen; 509 int* fOffsets; 510 Match* fMatch; 511 const XMLCh* fString; 512 unsigned int fOptions; 513 MemoryManager* fMemoryManager; 514 }; 515 516 // ----------------------------------------------------------------------- 517 // Unimplemented constructors and operators 518 // ----------------------------------------------------------------------- 519 RegularExpression(const RegularExpression&); 520 RegularExpression& operator=(const RegularExpression&); 521 522 // ----------------------------------------------------------------------- 523 // Protected Helper methods 524 // ----------------------------------------------------------------------- 525 void prepare(); 526 int parseOptions(const XMLCh* const options); 527 528 /** 529 * Matching helpers 530 */ 531 int match(Context* const context, const Op* const operations, XMLSize_t offset) const; 532 bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const; 533 534 /** 535 * Helper methods used by match(Context* ...) 536 */ 537 bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t& offset, 538 const bool ignoreCase) const; 539 bool matchDot(Context* const context, XMLSize_t& offset) const; 540 bool matchRange(Context* const context, const Op* const op, 541 XMLSize_t& offset, const bool ignoreCase) const; 542 bool matchAnchor(Context* const context, const XMLInt32 ch, 543 const XMLSize_t offset) const; 544 bool matchBackReference(Context* const context, const XMLInt32 ch, 545 XMLSize_t& offset, const bool ignoreCase) const; 546 bool matchString(Context* const context, const XMLCh* const literal, 547 XMLSize_t& offset, const bool ignoreCase) const; 548 int matchUnion(Context* const context, const Op* const op, XMLSize_t offset) const; 549 int matchCapture(Context* const context, const Op* const op, XMLSize_t offset) const; 550 551 /** 552 * Replace helpers 553 */ 554 void subInExp(const XMLCh* const repString, 555 const XMLCh* const origString, 556 const Match* subEx, 557 XMLBuffer &result, 558 MemoryManager* const manager) const; 559 /** 560 * Converts a token tree into an operation tree 561 */ 562 void compile(const Token* const token); 563 Op* compile(const Token* const token, Op* const next, 564 const bool reverse); 565 /** 566 * Helper methods used by compile 567 */ 568 Op* compileUnion(const Token* const token, Op* const next, 569 const bool reverse); 570 Op* compileParenthesis(const Token* const token, Op* const next, 571 const bool reverse); 572 Op* compileConcat(const Token* const token, Op* const next, 573 const bool reverse); 574 Op* compileClosure(const Token* const token, Op* const next, 575 const bool reverse, const Token::tokType tkType); 576 577 bool doTokenOverlap(const Op* op, Token* token); 578 579 // ----------------------------------------------------------------------- 580 // Protected data members 581 // ----------------------------------------------------------------------- 582 bool fHasBackReferences; 583 bool fFixedStringOnly; 584 int fNoGroups; 585 XMLSize_t fMinLength; 586 unsigned int fNoClosures; 587 unsigned int fOptions; 588 const BMPattern* fBMPattern; 589 XMLCh* fPattern; 590 XMLCh* fFixedString; 591 const Op* fOperations; 592 Token* fTokenTree; 593 RangeToken* fFirstChar; 594 static RangeToken* fWordRange; 595 OpFactory fOpFactory; 596 TokenFactory* fTokenFactory; 597 MemoryManager* fMemoryManager; 598 }; 599 600 601 602 // ----------------------------------------------------------------------- 603 // RegularExpression: Static initialize and cleanup methods 604 // ----------------------------------------------------------------------- staticCleanup()605 inline void RegularExpression::staticCleanup() 606 { 607 fWordRange = 0; 608 } 609 610 // --------------------------------------------------------------------------- 611 // RegularExpression: Cleanup methods 612 // --------------------------------------------------------------------------- cleanUp()613 inline void RegularExpression::cleanUp() { 614 615 fMemoryManager->deallocate(fPattern);//delete [] fPattern; 616 fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; 617 delete fBMPattern; 618 delete fTokenFactory; 619 } 620 621 // --------------------------------------------------------------------------- 622 // RegularExpression: Helper methods 623 // --------------------------------------------------------------------------- isSet(const int options,const int flag)624 inline bool RegularExpression::isSet(const int options, const int flag) { 625 626 return (options & flag) == flag; 627 } 628 629 compileUnion(const Token * const token,Op * const next,const bool reverse)630 inline Op* RegularExpression::compileUnion(const Token* const token, 631 Op* const next, 632 const bool reverse) { 633 634 XMLSize_t tokSize = token->size(); 635 UnionOp* uniOp = fOpFactory.createUnionOp(tokSize); 636 637 for (XMLSize_t i=0; i<tokSize; i++) { 638 639 uniOp->addElement(compile(token->getChild(i), next, reverse)); 640 } 641 642 return uniOp; 643 } 644 645 compileParenthesis(const Token * const token,Op * const next,const bool reverse)646 inline Op* RegularExpression::compileParenthesis(const Token* const token, 647 Op* const next, 648 const bool reverse) { 649 650 if (token->getNoParen() == 0) 651 return compile(token->getChild(0), next, reverse); 652 653 Op* captureOp = 0; 654 655 if (reverse) { 656 657 captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next); 658 captureOp = compile(token->getChild(0), captureOp, reverse); 659 660 return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp); 661 } 662 663 captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next); 664 captureOp = compile(token->getChild(0), captureOp, reverse); 665 666 return fOpFactory.createCaptureOp(token->getNoParen(), captureOp); 667 } 668 compileConcat(const Token * const token,Op * const next,const bool reverse)669 inline Op* RegularExpression::compileConcat(const Token* const token, 670 Op* const next, 671 const bool reverse) { 672 673 Op* ret = next; 674 XMLSize_t tokSize = token->size(); 675 676 if (!reverse) { 677 678 for (XMLSize_t i= tokSize; i>0; i--) { 679 ret = compile(token->getChild(i-1), ret, false); 680 } 681 } 682 else { 683 684 for (XMLSize_t i= 0; i< tokSize; i++) { 685 ret = compile(token->getChild(i), ret, true); 686 } 687 } 688 689 return ret; 690 } 691 compileClosure(const Token * const token,Op * const next,const bool reverse,const Token::tokType tkType)692 inline Op* RegularExpression::compileClosure(const Token* const token, 693 Op* const next, 694 const bool reverse, 695 const Token::tokType tkType) { 696 697 Op* ret = 0; 698 Token* childTok = token->getChild(0); 699 int min = token->getMin(); 700 int max = token->getMax(); 701 702 if (min >= 0 && min == max) { 703 704 ret = next; 705 for (int i=0; i< min; i++) { 706 ret = compile(childTok, ret, reverse); 707 } 708 709 return ret; 710 } 711 712 if (min > 0 && max > 0) 713 max -= min; 714 715 if (max > 0) { 716 717 ret = next; 718 for (int i=0; i<max; i++) { 719 720 ChildOp* childOp = fOpFactory.createQuestionOp( 721 tkType == Token::T_NONGREEDYCLOSURE); 722 723 childOp->setNextOp(next); 724 childOp->setChild(compile(childTok, ret, reverse)); 725 ret = childOp; 726 } 727 } 728 else { 729 730 ChildOp* childOp = 0; 731 732 if (tkType == Token::T_NONGREEDYCLOSURE) { 733 childOp = fOpFactory.createNonGreedyClosureOp(); 734 } 735 else { 736 737 if (childTok->getMinLength() == 0) 738 childOp = fOpFactory.createClosureOp(fNoClosures++); 739 else 740 childOp = fOpFactory.createClosureOp(-1); 741 } 742 743 childOp->setNextOp(next); 744 if(next==NULL || !doTokenOverlap(next, childTok)) 745 { 746 childOp->setOpType(tkType == Token::T_NONGREEDYCLOSURE?Op::O_FINITE_NONGREEDYCLOSURE:Op::O_FINITE_CLOSURE); 747 childOp->setChild(compile(childTok, NULL, reverse)); 748 } 749 else 750 { 751 childOp->setChild(compile(childTok, childOp, reverse)); 752 } 753 ret = childOp; 754 } 755 756 if (min > 0) { 757 758 for (int i=0; i< min; i++) { 759 ret = compile(childTok, ret, reverse); 760 } 761 } 762 763 return ret; 764 } 765 766 XERCES_CPP_NAMESPACE_END 767 768 #endif 769 /** 770 * End of file RegularExpression.hpp 771 */ 772 773