1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id$
20  */
21 
22 #if !defined(XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP)
23 #define XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP
24 
25 // ---------------------------------------------------------------------------
26 //  Includes
27 // ---------------------------------------------------------------------------
28 #include <xercesc/util/RefArrayVectorOf.hpp>
29 #include <xercesc/util/XMLString.hpp>
30 #include <xercesc/util/Janitor.hpp>
31 #include <xercesc/util/regx/Op.hpp>
32 #include <xercesc/util/regx/TokenFactory.hpp>
33 #include <xercesc/util/regx/BMPattern.hpp>
34 #include <xercesc/util/regx/OpFactory.hpp>
35 #include <xercesc/util/regx/RegxUtil.hpp>
36 
37 XERCES_CPP_NAMESPACE_BEGIN
38 
39 // ---------------------------------------------------------------------------
40 //  Forward Declaration
41 // ---------------------------------------------------------------------------
42 class RangeToken;
43 class Match;
44 class RegxParser;
45 
46 /**
47  * The RegularExpression class represents a parsed executable regular expression.
48  * This class is thread safe. Two similar regular expression syntaxes are
49  * supported:
50  *
51  * <ol>
52  * <li><a href="http://www.w3.org/TR/xpath-functions/#regex-syntax">The XPath 2.0 / XQuery regular expression syntax.</a>
53  * <li><a href="http://www.w3.org/TR/xmlschema-2/#regexs">The XML Schema regular expression syntax.</a></li>
54  * </ol>
55  *
56  * XPath 2.0 regular expression syntax is used unless the "X" option is specified during construction.
57  *
58  * Options can be specified during construction to change the way that the regular expression is handled.
59  * Options are specified by a string consisting of any number of the following characters:
60  *
61  * <table border='1'>
62  * <tr>
63  * <th>Character</th>
64  * <th>Meaning</th>
65  * </tr>
66  * <tr>
67  * <td valign='top' rowspan='1' colspan='1'>i</td>
68  * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
69  * Ignore case</a> when matching the regular expression.</td>
70  * </tr>
71  * <tr>
72  * <td valign='top' rowspan='1' colspan='1'>m</td>
73  * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
74  * Multi-line mode</a>. The meta characters "^" and "$" will match the beginning and end of lines.</td>
75  * </tr>
76  * <tr>
77  * <td valign='top' rowspan='1' colspan='1'>s</td>
78  * <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
79  * Single-line mode</a>. The meta character "." will match a newline character.</td>
80  * </tr>
81  * <tr>
82  * <td valign='top' rowspan='1' colspan='1'>x</td>
83  * <td valign='top' rowspan='1' colspan='1'>Allow extended comments.</td>
84  * </tr>
85  * <tr>
86  * <td valign='top' rowspan='1' colspan='1'>F</td>
87  * <td valign='top' rowspan='1' colspan='1'>Prohibit the fixed string optimization.</td>
88  * </tr>
89  * <tr>
90  * <td valign='top' rowspan='1' colspan='1'>H</td>
91  * <td valign='top' rowspan='1' colspan='1'>Prohibit the head character optimization.</td>
92  * </tr>
93  * <tr>
94  * <td valign='top' rowspan='1' colspan='1'>X</td>
95  * <td valign='top' rowspan='1' colspan='1'>Parse the regular expression according to the
96  * <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema regular expression syntax</a>.</td>
97  * </tr>
98  * </table>
99  */
100 class XMLUTIL_EXPORT RegularExpression : public XMemory
101 {
102 public:
103     // -----------------------------------------------------------------------
104     //  Public Constructors and Destructor
105     // -----------------------------------------------------------------------
106 
107     /** @name Constructors and destructor */
108     //@{
109 
110     /** Parses the given regular expression.
111       *
112       * @param pattern the regular expression in the local code page
113       * @param manager the memory manager to use
114       */
115     RegularExpression
116     (
117         const char* const pattern
118         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
119     );
120 
121     /** Parses the given regular expression using the options specified.
122       *
123       * @param pattern the regular expression in the local code page
124       * @param options the options string in the local code page
125       * @param manager the memory manager to use
126       */
127     RegularExpression
128     (
129         const char* const pattern
130         , const char* const options
131         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
132     );
133 
134     /** Parses the given regular expression.
135       *
136       * @param pattern the regular expression
137       * @param manager the memory manager to use
138       */
139     RegularExpression
140     (
141         const XMLCh* const pattern
142         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
143     );
144 
145     /** Parses the given regular expression using the options specified.
146       *
147       * @param pattern the regular expression
148       * @param options the options string
149       * @param manager the memory manager to use
150       */
151     RegularExpression
152     (
153         const XMLCh* const pattern
154         , const XMLCh* const options
155         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
156     );
157 
158     virtual ~RegularExpression();
159 
160     //@}
161 
162     // -----------------------------------------------------------------------
163     //  Public Constants
164     // -----------------------------------------------------------------------
165     static const unsigned int   IGNORE_CASE;
166     static const unsigned int   SINGLE_LINE;
167     static const unsigned int   MULTIPLE_LINE;
168     static const unsigned int   EXTENDED_COMMENT;
169     static const unsigned int   PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
170     static const unsigned int   PROHIBIT_FIXED_STRING_OPTIMIZATION;
171     static const unsigned int   XMLSCHEMA_MODE;
172     typedef enum
173     {
174         wordTypeIgnore = 0,
175         wordTypeLetter = 1,
176         wordTypeOther = 2
177     } wordType;
178 
179     // -----------------------------------------------------------------------
180     //  Public Helper methods
181     // -----------------------------------------------------------------------
182 
183     /** @name Public helper methods */
184     //@{
185 
186     static int getOptionValue(const XMLCh ch);
187     static bool isSet(const int options, const int flag);
188 
189     //@}
190 
191     // -----------------------------------------------------------------------
192     //  Matching methods
193     // -----------------------------------------------------------------------
194 
195     /** @name Matching methods */
196     //@{
197 
198     /** Tries to match the given null terminated string against the regular expression, returning
199       * true if successful.
200       *
201       * @param matchString the string to match in the local code page
202       * @param manager     the memory manager to use
203       *
204       * @return Whether the string matched the regular expression or not.
205       */
206     bool matches(const char* const matchString,
207                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
208 
209     /** Tries to match the given string between the specified start and end offsets
210       * against the regular expression, returning true if successful.
211       *
212       * @param matchString the string to match in the local code page
213       * @param start       the offset of the start of the string
214       * @param end         the offset of the end of the string
215       * @param manager     the memory manager to use
216       *
217       * @return Whether the string matched the regular expression or not.
218       */
219     bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
220                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
221 
222     /** Tries to match the given null terminated string against the regular expression, returning
223       * true if successful.
224       *
225       * @param matchString the string to match in the local code page
226       * @param pMatch      a Match object, which will be populated with the offsets for the
227       * regular expression match and sub-matches.
228       * @param manager     the memory manager to use
229       *
230       * @return Whether the string matched the regular expression or not.
231       */
232     bool matches(const char* const matchString, Match* const pMatch,
233                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
234 
235     /** Tries to match the given string between the specified start and end offsets
236       * against the regular expression, returning true if successful.
237       *
238       * @param matchString the string to match in the local code page
239       * @param start       the offset of the start of the string
240       * @param end         the offset of the end of the string
241       * @param pMatch      a Match object, which will be populated with the offsets for the
242       * regular expression match and sub-matches.
243       * @param manager     the memory manager to use
244       *
245       * @return Whether the string matched the regular expression or not.
246       */
247     bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
248                  Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
249 
250     /** Tries to match the given null terminated string against the regular expression, returning
251       * true if successful.
252       *
253       * @param matchString the string to match
254       * @param manager     the memory manager to use
255       *
256       * @return Whether the string matched the regular expression or not.
257       */
258     bool matches(const XMLCh* const matchString,
259                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
260 
261     /** Tries to match the given string between the specified start and end offsets
262       * against the regular expression, returning true if successful.
263       *
264       * @param matchString the string to match
265       * @param start       the offset of the start of the string
266       * @param end         the offset of the end of the string
267       * @param manager     the memory manager to use
268       *
269       * @return Whether the string matched the regular expression or not.
270       */
271     bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
272                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
273 
274     /** Tries to match the given null terminated string against the regular expression, returning
275       * true if successful.
276       *
277       * @param matchString the string to match
278       * @param pMatch      a Match object, which will be populated with the offsets for the
279       * regular expression match and sub-matches.
280       * @param manager     the memory manager to use
281       *
282       * @return Whether the string matched the regular expression or not.
283       */
284     bool matches(const XMLCh* const matchString, Match* const pMatch,
285                  MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
286 
287     /** Tries to match the given string between the specified start and end offsets
288       * against the regular expression, returning true if successful.
289       *
290       * @param matchString the string to match
291       * @param start       the offset of the start of the string
292       * @param end         the offset of the end of the string
293       * @param pMatch      a Match object, which will be populated with the offsets for the
294       * regular expression match and sub-matches.
295       * @param manager     the memory manager to use
296       *
297       * @return Whether the string matched the regular expression or not.
298       */
299     bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
300                  Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
301 
302     /** Tries to match the given string between the specified start and end offsets
303       * against the regular expression. The subEx vector is populated with the details
304       * for every non-overlapping occurrence of a match in the string.
305       *
306       * @param matchString the string to match
307       * @param start       the offset of the start of the string
308       * @param end         the offset of the end of the string
309       * @param subEx       a RefVectorOf Match objects, populated with the offsets for the
310       * regular expression match and sub-matches.
311       * @param manager     the memory manager to use
312       */
313     void allMatches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
314                     RefVectorOf<Match> *subEx, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
315 
316     //@}
317 
318     // -----------------------------------------------------------------------
319     //  Tokenize methods
320     // -----------------------------------------------------------------------
321     // Note: The caller owns the string vector that is returned, and is responsible
322     //       for deleting it.
323 
324     /** @name Tokenize methods */
325     //@{
326 
327     /** Tokenizes the null terminated string according to the regular expression, returning
328       * the parts of the string that do not match the regular expression.
329       *
330       * @param matchString the string to match in the local code page
331       * @param manager     the memory manager to use
332       *
333       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
334       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
335       * deleting it.
336       */
337     RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString,
338                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
339 
340     /** Tokenizes the string between the specified start and end offsets according to the regular
341       * expression, returning the parts of the string that do not match the regular expression.
342       *
343       * @param matchString the string to match in the local code page
344       * @param start       the offset of the start of the string
345       * @param end         the offset of the end of the string
346       * @param manager     the memory manager to use
347       *
348       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
349       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
350       * deleting it.
351       */
352     RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
353                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
354 
355     /** Tokenizes the null terminated string according to the regular expression, returning
356       * the parts of the string that do not match the regular expression.
357       *
358       * @param matchString the string to match
359       * @param manager     the memory manager to use
360       *
361       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
362       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
363       * deleting it.
364       */
365     RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
366                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
367 
368     /** Tokenizes the string between the specified start and end offsets according to the regular
369       * expression, returning the parts of the string that do not match the regular expression.
370       *
371       * @param matchString the string to match
372       * @param start       the offset of the start of the string
373       * @param end         the offset of the end of the string
374       * @param manager     the memory manager to use
375       *
376       * @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
377       * given MemoryManager. The caller owns the string vector that is returned, and is responsible for
378       * deleting it.
379       */
380     RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
381                                       MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
382 
383     //@}
384 
385     // -----------------------------------------------------------------------
386     //  Replace methods
387     // -----------------------------------------------------------------------
388     // Note: The caller owns the XMLCh* that is returned, and is responsible for
389     //       deleting it.
390 
391     /** @name Replace methods */
392     //@{
393 
394     /** Performs a search and replace on the given null terminated string, replacing
395       * any substring that matches the regular expression with a string derived from
396       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
397       *
398       * @param matchString   the string to match in the local code page
399       * @param replaceString the string to replace in the local code page
400       * @param manager       the memory manager to use
401       *
402       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
403       * that is returned, and is responsible for deleting it.
404       */
405     XMLCh *replace(const char* const matchString, const char* const replaceString,
406                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
407 
408     /** Performs a search and replace on the given string between the specified start and end offsets, replacing
409       * any substring that matches the regular expression with a string derived from
410       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
411       *
412       * @param matchString   the string to match in the local code page
413       * @param replaceString the string to replace in the local code page
414       * @param start         the offset of the start of the string
415       * @param end           the offset of the end of the string
416       * @param manager       the memory manager to use
417       *
418       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
419       * that is returned, and is responsible for deleting it.
420       */
421     XMLCh *replace(const char* const matchString, const char* const replaceString,
422                    const XMLSize_t start, const XMLSize_t end,
423                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
424 
425     /** Performs a search and replace on the given null terminated string, replacing
426       * any substring that matches the regular expression with a string derived from
427       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
428       *
429       * @param matchString   the string to match
430       * @param replaceString the string to replace
431       * @param manager       the memory manager to use
432       *
433       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
434       * that is returned, and is responsible for deleting it.
435       */
436     XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
437                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
438 
439     /** Performs a search and replace on the given string between the specified start and end offsets, replacing
440       * any substring that matches the regular expression with a string derived from
441       * the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
442       *
443       * @param matchString   the string to match
444       * @param replaceString the string to replace
445       * @param start         the offset of the start of the string
446       * @param end           the offset of the end of the string
447       * @param manager       the memory manager to use
448       *
449       * @return The resulting string allocated using the given MemoryManager. The caller owns the string
450       * that is returned, and is responsible for deleting it.
451       */
452     XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
453                    const XMLSize_t start, const XMLSize_t end,
454                    MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
455 
456     //@}
457 
458     // -----------------------------------------------------------------------
459     //  Static initialize and cleanup methods
460     // -----------------------------------------------------------------------
461 
462     /** @name Static initilize and cleanup methods */
463     //@{
464 
465     static void
466     staticInitialize(MemoryManager*  memoryManager);
467 
468     static void
469     staticCleanup();
470 
471     //@}
472 
473 protected:
474     virtual RegxParser* getRegexParser(const int options, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
475 
476     // -----------------------------------------------------------------------
477     //  Cleanup methods
478     // -----------------------------------------------------------------------
479     void cleanUp();
480 
481     // -----------------------------------------------------------------------
482     //  Setter methods
483     // -----------------------------------------------------------------------
484     void setPattern(const XMLCh* const pattern, const XMLCh* const options=0);
485 
486     // -----------------------------------------------------------------------
487     //  Protected data types
488     // -----------------------------------------------------------------------
489     class XMLUTIL_EXPORT Context : public XMemory
490     {
491         public :
492             Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
493             Context(Context* src);
494             ~Context();
495 
496             Context& operator= (const Context& other);
getString() const497             inline const XMLCh* getString() const { return fString; }
498             void reset(const XMLCh* const string, const XMLSize_t stringLen,
499                        const XMLSize_t start, const XMLSize_t limit, const int noClosures,
500                        const unsigned int options);
501             bool nextCh(XMLInt32& ch, XMLSize_t& offset);
502 
503             bool           fAdoptMatch;
504             XMLSize_t      fStart;
505             XMLSize_t      fLimit;
506             XMLSize_t      fLength;    // fLimit - fStart
507             int            fSize;
508             XMLSize_t      fStringMaxLen;
509             int*           fOffsets;
510             Match*         fMatch;
511             const XMLCh*   fString;
512             unsigned int   fOptions;
513             MemoryManager* fMemoryManager;
514     };
515 
516     // -----------------------------------------------------------------------
517     //  Unimplemented constructors and operators
518     // -----------------------------------------------------------------------
519     RegularExpression(const RegularExpression&);
520     RegularExpression& operator=(const RegularExpression&);
521 
522     // -----------------------------------------------------------------------
523     //  Protected Helper methods
524     // -----------------------------------------------------------------------
525     void prepare();
526     int parseOptions(const XMLCh* const options);
527 
528     /**
529       *    Matching helpers
530       */
531     int match(Context* const context, const Op* const operations, XMLSize_t offset) const;
532     bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const;
533 
534     /**
535       *    Helper methods used by match(Context* ...)
536       */
537     bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t& offset,
538                    const bool ignoreCase) const;
539     bool matchDot(Context* const context, XMLSize_t& offset) const;
540     bool matchRange(Context* const context, const Op* const op,
541                     XMLSize_t& offset, const bool ignoreCase) const;
542     bool matchAnchor(Context* const context, const XMLInt32 ch,
543                      const XMLSize_t offset) const;
544     bool matchBackReference(Context* const context, const XMLInt32 ch,
545                             XMLSize_t& offset, const bool ignoreCase) const;
546     bool matchString(Context* const context, const XMLCh* const literal,
547                      XMLSize_t& offset, const bool ignoreCase) const;
548     int  matchUnion(Context* const context, const Op* const op, XMLSize_t offset) const;
549     int matchCapture(Context* const context, const Op* const op, XMLSize_t offset) const;
550 
551     /**
552      *    Replace helpers
553      */
554     void subInExp(const XMLCh* const repString,
555                   const XMLCh* const origString,
556                   const Match* subEx,
557                   XMLBuffer &result,
558                   MemoryManager* const manager) const;
559     /**
560      *    Converts a token tree into an operation tree
561      */
562     void compile(const Token* const token);
563     Op*  compile(const Token* const token, Op* const next,
564                  const bool reverse);
565     /**
566       *    Helper methods used by compile
567       */
568     Op* compileUnion(const Token* const token, Op* const next,
569                      const bool reverse);
570     Op* compileParenthesis(const Token* const token, Op* const next,
571                            const bool reverse);
572     Op* compileConcat(const Token* const token, Op* const next,
573                       const bool reverse);
574     Op* compileClosure(const Token* const token, Op* const next,
575                        const bool reverse, const Token::tokType tkType);
576 
577     bool doTokenOverlap(const Op* op, Token* token);
578 
579     // -----------------------------------------------------------------------
580     //  Protected data members
581     // -----------------------------------------------------------------------
582     bool               fHasBackReferences;
583     bool               fFixedStringOnly;
584     int                fNoGroups;
585     XMLSize_t          fMinLength;
586     unsigned int       fNoClosures;
587     unsigned int       fOptions;
588     const BMPattern*   fBMPattern;
589     XMLCh*             fPattern;
590     XMLCh*             fFixedString;
591     const Op*          fOperations;
592     Token*             fTokenTree;
593     RangeToken*        fFirstChar;
594     static RangeToken* fWordRange;
595     OpFactory          fOpFactory;
596     TokenFactory*      fTokenFactory;
597     MemoryManager*     fMemoryManager;
598 };
599 
600 
601 
602   // -----------------------------------------------------------------------
603   //  RegularExpression: Static initialize and cleanup methods
604   // -----------------------------------------------------------------------
staticCleanup()605   inline void RegularExpression::staticCleanup()
606   {
607       fWordRange = 0;
608   }
609 
610   // ---------------------------------------------------------------------------
611   //  RegularExpression: Cleanup methods
612   // ---------------------------------------------------------------------------
cleanUp()613   inline void RegularExpression::cleanUp() {
614 
615       fMemoryManager->deallocate(fPattern);//delete [] fPattern;
616       fMemoryManager->deallocate(fFixedString);//delete [] fFixedString;
617       delete fBMPattern;
618       delete fTokenFactory;
619   }
620 
621   // ---------------------------------------------------------------------------
622   //  RegularExpression: Helper methods
623   // ---------------------------------------------------------------------------
isSet(const int options,const int flag)624   inline bool RegularExpression::isSet(const int options, const int flag) {
625 
626       return (options & flag) == flag;
627   }
628 
629 
compileUnion(const Token * const token,Op * const next,const bool reverse)630   inline Op* RegularExpression::compileUnion(const Token* const token,
631                                              Op* const next,
632                                              const bool reverse) {
633 
634       XMLSize_t tokSize = token->size();
635       UnionOp* uniOp = fOpFactory.createUnionOp(tokSize);
636 
637       for (XMLSize_t i=0; i<tokSize; i++) {
638 
639           uniOp->addElement(compile(token->getChild(i), next, reverse));
640       }
641 
642       return uniOp;
643   }
644 
645 
compileParenthesis(const Token * const token,Op * const next,const bool reverse)646   inline Op* RegularExpression::compileParenthesis(const Token* const token,
647                                                    Op* const next,
648                                                    const bool reverse) {
649 
650       if (token->getNoParen() == 0)
651           return compile(token->getChild(0), next, reverse);
652 
653       Op* captureOp    = 0;
654 
655       if (reverse) {
656 
657           captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next);
658           captureOp = compile(token->getChild(0), captureOp, reverse);
659 
660           return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp);
661       }
662 
663       captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next);
664       captureOp = compile(token->getChild(0), captureOp, reverse);
665 
666       return fOpFactory.createCaptureOp(token->getNoParen(), captureOp);
667   }
668 
compileConcat(const Token * const token,Op * const next,const bool reverse)669   inline Op* RegularExpression::compileConcat(const Token* const token,
670                                               Op*  const next,
671                                               const bool reverse) {
672 
673       Op* ret = next;
674       XMLSize_t tokSize = token->size();
675 
676       if (!reverse) {
677 
678           for (XMLSize_t i= tokSize; i>0; i--) {
679               ret = compile(token->getChild(i-1), ret, false);
680           }
681       }
682       else {
683 
684           for (XMLSize_t i= 0; i< tokSize; i++) {
685               ret = compile(token->getChild(i), ret, true);
686           }
687       }
688 
689       return ret;
690   }
691 
compileClosure(const Token * const token,Op * const next,const bool reverse,const Token::tokType tkType)692   inline Op* RegularExpression::compileClosure(const Token* const token,
693                                                Op* const next,
694                                                const bool reverse,
695                                                const Token::tokType tkType) {
696 
697       Op*    ret      = 0;
698       Token* childTok = token->getChild(0);
699       int    min      = token->getMin();
700       int    max      = token->getMax();
701 
702       if (min >= 0 && min == max) {
703 
704           ret = next;
705           for (int i=0; i< min; i++) {
706               ret = compile(childTok, ret, reverse);
707           }
708 
709           return ret;
710       }
711 
712       if (min > 0 && max > 0)
713           max -= min;
714 
715       if (max > 0) {
716 
717           ret = next;
718           for (int i=0; i<max; i++) {
719 
720               ChildOp* childOp = fOpFactory.createQuestionOp(
721                   tkType == Token::T_NONGREEDYCLOSURE);
722 
723               childOp->setNextOp(next);
724               childOp->setChild(compile(childTok, ret, reverse));
725               ret = childOp;
726           }
727       }
728       else {
729 
730           ChildOp* childOp = 0;
731 
732           if (tkType == Token::T_NONGREEDYCLOSURE) {
733               childOp = fOpFactory.createNonGreedyClosureOp();
734           }
735           else {
736 
737               if (childTok->getMinLength() == 0)
738                   childOp = fOpFactory.createClosureOp(fNoClosures++);
739               else
740                   childOp = fOpFactory.createClosureOp(-1);
741           }
742 
743           childOp->setNextOp(next);
744           if(next==NULL || !doTokenOverlap(next, childTok))
745           {
746               childOp->setOpType(tkType == Token::T_NONGREEDYCLOSURE?Op::O_FINITE_NONGREEDYCLOSURE:Op::O_FINITE_CLOSURE);
747               childOp->setChild(compile(childTok, NULL, reverse));
748           }
749           else
750           {
751               childOp->setChild(compile(childTok, childOp, reverse));
752           }
753           ret = childOp;
754       }
755 
756       if (min > 0) {
757 
758           for (int i=0; i< min; i++) {
759               ret = compile(childTok, ret, reverse);
760           }
761       }
762 
763       return ret;
764   }
765 
766 XERCES_CPP_NAMESPACE_END
767 
768 #endif
769 /**
770   * End of file RegularExpression.hpp
771   */
772 
773