1 // deelx.h
2 //
3 // DEELX Regular Expression Engine (v1.2)
4 //
5 // Copyright 2006 (c) RegExLab.com
6 // All Rights Reserved.
7 //
8 // http://www.regexlab.com/deelx/
9 //
10 // Author: ʷ��ΰ (sswater shi)
11 // sswater@gmail.com
12 //
13 // $Revision: 1.1.2.26 $
14 //
15
16 #ifndef __DEELX_REGEXP__H__
17 #define __DEELX_REGEXP__H__
18
19 #ifndef WIN32
20 #pragma GCC diagnostic ignored "-Wuninitialized"
21 #endif
22
23 #include <memory.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <string.h>
27 #include <stdio.h>
28
29 //
30 // Data Reference
31 //
32 template <class ELT> class CBufferRefT
33 {
34 public:
35 CBufferRefT(const ELT * pcsz, int length);
36 CBufferRefT(const ELT * pcsz);
37
38 public:
39 int nCompare (const ELT * pcsz) const;
40 int nCompareNoCase(const ELT * pcsz) const;
41 int Compare (const ELT * pcsz) const;
42 int CompareNoCase(const ELT * pcsz) const;
43 int Compare (const CBufferRefT <ELT> &) const;
44 int CompareNoCase(const CBufferRefT <ELT> &) const;
45
46 ELT At (int nIndex, ELT def = 0) const;
47 ELT operator [] (int nIndex) const;
48
49 const ELT * GetBuffer() const;
50 int GetSize() const;
51
52 public:
53 virtual ~CBufferRefT();
54
55 // Content
56 protected:
57 const ELT * m_pRef;
58 int m_nSize;
59 };
60
61 //
62 // Implemenation
63 //
CBufferRefT(const ELT * pcsz,int length)64 template <class ELT> CBufferRefT <ELT> :: CBufferRefT(const ELT * pcsz, int length)
65 {
66 m_pRef = pcsz;
67 m_nSize = length;
68 }
69
CBufferRefT(const ELT * pcsz)70 template <class ELT> CBufferRefT <ELT> :: CBufferRefT(const ELT * pcsz)
71 {
72 m_pRef = pcsz;
73 m_nSize = 0;
74
75 if(pcsz != 0) while(m_pRef[m_nSize] != 0) m_nSize ++;
76 }
77
nCompare(const ELT * pcsz)78 template <class ELT> int CBufferRefT <ELT> :: nCompare(const ELT * pcsz) const
79 {
80 for(int i=0; i<m_nSize; i++)
81 {
82 if(m_pRef[i] != pcsz[i])
83 return m_pRef[i] - pcsz[i];
84 }
85
86 return 0;
87 }
88
nCompareNoCase(const ELT * pcsz)89 template <class ELT> int CBufferRefT <ELT> :: nCompareNoCase(const ELT * pcsz) const
90 {
91 for(int i=0; i<m_nSize; i++)
92 {
93 if(m_pRef[i] != pcsz[i])
94 {
95 if(toupper((int)m_pRef[i]) != toupper((int)pcsz[i]))
96 return m_pRef[i] - pcsz[i];
97 }
98 }
99
100 return 0;
101 }
102
Compare(const ELT * pcsz)103 template <class ELT> inline int CBufferRefT <ELT> :: Compare(const ELT * pcsz) const
104 {
105 return nCompare(pcsz) ? 1 : (int)pcsz[m_nSize];
106 }
107
CompareNoCase(const ELT * pcsz)108 template <class ELT> inline int CBufferRefT <ELT> :: CompareNoCase(const ELT * pcsz) const
109 {
110 return nCompareNoCase(pcsz) ? 1 : (int)pcsz[m_nSize];
111 }
112
Compare(const CBufferRefT<ELT> & cref)113 template <class ELT> inline int CBufferRefT <ELT> :: Compare(const CBufferRefT <ELT> & cref) const
114 {
115 return m_nSize == cref.m_nSize ? nCompare(cref.GetBuffer()) : 1;
116 }
117
CompareNoCase(const CBufferRefT<ELT> & cref)118 template <class ELT> inline int CBufferRefT <ELT> :: CompareNoCase(const CBufferRefT <ELT> & cref) const
119 {
120 return m_nSize == cref.m_nSize ? nCompareNoCase(cref.GetBuffer()) : 1;
121 }
122
At(int nIndex,ELT def)123 template <class ELT> inline ELT CBufferRefT <ELT> :: At(int nIndex, ELT def) const
124 {
125 return nIndex >= m_nSize ? def : m_pRef[nIndex];
126 }
127
128 template <class ELT> inline ELT CBufferRefT <ELT> :: operator [] (int nIndex) const
129 {
130 return nIndex >= m_nSize ? 0 : m_pRef[nIndex];
131 }
132
GetBuffer()133 template <class ELT> const ELT * CBufferRefT <ELT> :: GetBuffer() const
134 {
135 static const ELT _def[] = {0}; return m_pRef ? m_pRef : _def;
136 }
137
GetSize()138 template <class ELT> inline int CBufferRefT <ELT> :: GetSize() const
139 {
140 return m_nSize;
141 }
142
~CBufferRefT()143 template <class ELT> CBufferRefT <ELT> :: ~CBufferRefT()
144 {
145 }
146
147 //
148 // Data Buffer
149 //
150 template <class ELT> class CBufferT : public CBufferRefT <ELT>
151 {
152 public:
153 CBufferT(const ELT * pcsz, int length);
154 CBufferT(const ELT * pcsz);
155 CBufferT();
156
157 public:
158 ELT & operator [] (int nIndex);
159 const ELT & operator [] (int nIndex) const;
160 void Append(const ELT * pcsz, int length, int eol = 0);
161 void Append(ELT el, int eol = 0);
162
163 public:
164 void Push(ELT el);
165 int Pop (ELT & el);
166 int Peek(ELT & el) const;
167
168 public:
169 const ELT * GetBuffer() const;
170 ELT * GetBuffer();
171 ELT * Detach();
172 void Release();
173 void Prepare(int index, int fill = 0);
174 void Restore(int size);
175
176 public:
177 virtual ~CBufferT();
178
179 // Content
180 protected:
181 ELT * m_pBuffer;
182 int m_nMaxLength;
183 };
184
185 //
186 // Implemenation
187 //
CBufferT(const ELT * pcsz,int length)188 template <class ELT> CBufferT <ELT> :: CBufferT(const ELT * pcsz, int length) : CBufferRefT <ELT> (0, length)
189 {
190 m_nMaxLength = CBufferRefT <ELT> :: m_nSize + 1;
191
192 CBufferRefT <ELT> :: m_pRef = m_pBuffer = new ELT[m_nMaxLength];
193 memcpy(m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
194 m_pBuffer[CBufferRefT <ELT> :: m_nSize] = 0;
195 }
196
CBufferT(const ELT * pcsz)197 template <class ELT> CBufferT <ELT> :: CBufferT(const ELT * pcsz) : CBufferRefT <ELT> (pcsz)
198 {
199 m_nMaxLength = CBufferRefT <ELT> :: m_nSize + 1;
200
201 CBufferRefT <ELT> :: m_pRef = m_pBuffer = new ELT[m_nMaxLength];
202 memcpy(m_pBuffer, pcsz, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
203 m_pBuffer[CBufferRefT <ELT> :: m_nSize] = 0;
204 }
205
CBufferT()206 template <class ELT> CBufferT <ELT> :: CBufferT() : CBufferRefT <ELT> (0, 0)
207 {
208 m_nMaxLength = 0;
209 m_pBuffer = 0;
210 }
211
212 template <class ELT> inline ELT & CBufferT <ELT> :: operator [] (int nIndex)
213 {
214 return m_pBuffer[nIndex];
215 }
216
217 template <class ELT> inline const ELT & CBufferT <ELT> :: operator [] (int nIndex) const
218 {
219 return m_pBuffer[nIndex];
220 }
221
Append(const ELT * pcsz,int length,int eol)222 template <class ELT> void CBufferT <ELT> :: Append(const ELT * pcsz, int length, int eol)
223 {
224 int nNewLength = m_nMaxLength;
225
226 // Check length
227 if(nNewLength < 8)
228 nNewLength = 8;
229
230 if(CBufferRefT <ELT> :: m_nSize + length + eol > nNewLength)
231 nNewLength *= 2;
232
233 if(CBufferRefT <ELT> :: m_nSize + length + eol > nNewLength)
234 {
235 nNewLength = CBufferRefT <ELT> :: m_nSize + length + eol + 11;
236 nNewLength -= nNewLength % 8;
237 }
238
239 // Realloc
240 if(nNewLength > m_nMaxLength)
241 {
242 ELT * pNewBuffer = new ELT[nNewLength];
243
244 if(m_pBuffer != 0)
245 {
246 memcpy(pNewBuffer, m_pBuffer, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
247 delete [] m_pBuffer;
248 }
249
250 CBufferRefT <ELT> :: m_pRef = m_pBuffer = pNewBuffer;
251 m_nMaxLength = nNewLength;
252 }
253
254 // Append
255 memcpy(m_pBuffer + CBufferRefT <ELT> :: m_nSize, pcsz, sizeof(ELT) * length);
256 CBufferRefT <ELT> :: m_nSize += length;
257
258 if(eol > 0) m_pBuffer[CBufferRefT <ELT> :: m_nSize] = 0;
259 }
260
Append(ELT el,int eol)261 template <class ELT> inline void CBufferT <ELT> :: Append(ELT el, int eol)
262 {
263 Append(&el, 1, eol);
264 }
265
Push(ELT el)266 template <class ELT> void CBufferT <ELT> :: Push(ELT el)
267 {
268 // Realloc
269 if(CBufferRefT <ELT> :: m_nSize >= m_nMaxLength)
270 {
271 int nNewLength = m_nMaxLength * 2;
272 if( nNewLength < 8 ) nNewLength = 8;
273
274 ELT * pNewBuffer = new ELT[nNewLength];
275
276 if(m_pBuffer != 0)
277 {
278 memcpy(pNewBuffer, m_pBuffer, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
279 delete [] m_pBuffer;
280 }
281
282 CBufferRefT <ELT> :: m_pRef = m_pBuffer = pNewBuffer;
283 m_nMaxLength = nNewLength;
284 }
285
286 // Append
287 m_pBuffer[CBufferRefT <ELT> :: m_nSize++] = el;
288 }
289
Pop(ELT & el)290 template <class ELT> inline int CBufferT <ELT> :: Pop(ELT & el)
291 {
292 if(CBufferRefT <ELT> :: m_nSize > 0)
293 {
294 el = m_pBuffer[--CBufferRefT <ELT> :: m_nSize];
295 return 1;
296 }
297 else
298 {
299 return 0;
300 }
301 }
302
Peek(ELT & el)303 template <class ELT> inline int CBufferT <ELT> :: Peek(ELT & el) const
304 {
305 if(CBufferRefT <ELT> :: m_nSize > 0)
306 {
307 el = m_pBuffer[CBufferRefT <ELT> :: m_nSize - 1];
308 return 1;
309 }
310 else
311 {
312 return 0;
313 }
314 }
315
GetBuffer()316 template <class ELT> const ELT * CBufferT <ELT> :: GetBuffer() const
317 {
318 static const ELT _def[] = {0}; return m_pBuffer ? m_pBuffer : _def;
319 }
320
GetBuffer()321 template <class ELT> ELT * CBufferT <ELT> :: GetBuffer()
322 {
323 static const ELT _def[] = {0}; return m_pBuffer ? m_pBuffer : (ELT *)_def;
324 }
325
Detach()326 template <class ELT> ELT * CBufferT <ELT> :: Detach()
327 {
328 ELT * pBuffer = m_pBuffer;
329
330 CBufferRefT <ELT> :: m_pRef = m_pBuffer = 0;
331 CBufferRefT <ELT> :: m_nSize = m_nMaxLength = 0;
332
333 return pBuffer;
334 }
335
Release()336 template <class ELT> void CBufferT <ELT> :: Release()
337 {
338 ELT * pBuffer = Detach();
339
340 if(pBuffer != 0) delete [] pBuffer;
341 }
342
Prepare(int index,int fill)343 template <class ELT> void CBufferT <ELT> :: Prepare(int index, int fill)
344 {
345 int nNewSize = index + 1;
346
347 // Realloc
348 if(nNewSize > m_nMaxLength)
349 {
350 int nNewLength = m_nMaxLength;
351
352 if( nNewLength < 8 )
353 nNewLength = 8;
354
355 if( nNewSize > nNewLength )
356 nNewLength *= 2;
357
358 if( nNewSize > nNewLength )
359 {
360 nNewLength = nNewSize + 11;
361 nNewLength -= nNewLength % 8;
362 }
363
364 ELT * pNewBuffer = new ELT[nNewLength];
365
366 if(m_pBuffer != 0)
367 {
368 memcpy(pNewBuffer, m_pBuffer, sizeof(ELT) * CBufferRefT <ELT> :: m_nSize);
369 delete [] m_pBuffer;
370 }
371
372 CBufferRefT <ELT> :: m_pRef = m_pBuffer = pNewBuffer;
373 m_nMaxLength = nNewLength;
374 }
375
376 // size
377 if( CBufferRefT <ELT> :: m_nSize < nNewSize )
378 {
379 memset(m_pBuffer + CBufferRefT <ELT> :: m_nSize, fill, sizeof(ELT) * (nNewSize - CBufferRefT <ELT> :: m_nSize));
380 CBufferRefT <ELT> :: m_nSize = nNewSize;
381 }
382 }
383
Restore(int size)384 template <class ELT> inline void CBufferT <ELT> :: Restore(int size)
385 {
386 CBufferRefT <ELT> :: m_nSize = size;
387 }
388
~CBufferT()389 template <class ELT> CBufferT <ELT> :: ~CBufferT()
390 {
391 if(m_pBuffer != 0) delete [] m_pBuffer;
392 }
393
394 //
395 // Context
396 //
397 class CContext
398 {
399 public:
400 CBufferT <int> m_stack;
401 CBufferT <int> m_capturestack, m_captureindex;
402
403 public:
404 int m_nCurrentPos;
405 int m_nBeginPos;
406 int m_nLastBeginPos;
407 int m_nParenZindex;
408
409 void * m_pMatchString;
410 int m_pMatchStringLength;
411 };
412
413 //
414 // Interface
415 //
416 class ElxInterface
417 {
418 public:
419 virtual int Match (CContext * pContext) const = 0;
420 virtual int MatchNext(CContext * pContext) const = 0;
421
422 public:
~ElxInterface()423 virtual ~ElxInterface() {};
424 };
425
426 //
427 // Alternative
428 //
429 template <int x> class CAlternativeElxT : public ElxInterface
430 {
431 public:
432 int Match (CContext * pContext) const;
433 int MatchNext(CContext * pContext) const;
434
435 public:
436 CAlternativeElxT();
437
438 public:
439 CBufferT <ElxInterface *> m_elxlist;
440 };
441
442 typedef CAlternativeElxT <0> CAlternativeElx;
443
444 //
445 // Assert
446 //
447 template <int x> class CAssertElxT : public ElxInterface
448 {
449 public:
450 int Match (CContext * pContext) const;
451 int MatchNext(CContext * pContext) const;
452
453 public:
454 CAssertElxT(ElxInterface * pelx, int byes = 1);
455
456 public:
457 ElxInterface * m_pelx;
458 int m_byes;
459 };
460
461 typedef CAssertElxT <0> CAssertElx;
462
463 //
464 // Back reference elx
465 //
466 template <class CHART> class CBackrefElxT : public ElxInterface
467 {
468 public:
469 int Match (CContext * pContext) const;
470 int MatchNext(CContext * pContext) const;
471
472 public:
473 CBackrefElxT(int nnumber, int brightleft, int bignorecase);
474
475 public:
476 int m_nnumber;
477 int m_brightleft;
478 int m_bignorecase;
479
480 CBufferT <CHART> m_szNamed;
481 };
482
483 //
484 // Implementation
485 //
CBackrefElxT(int nnumber,int brightleft,int bignorecase)486 template <class CHART> CBackrefElxT <CHART> :: CBackrefElxT(int nnumber, int brightleft, int bignorecase)
487 {
488 m_nnumber = nnumber;
489 m_brightleft = brightleft;
490 m_bignorecase = bignorecase;
491 }
492
Match(CContext * pContext)493 template <class CHART> int CBackrefElxT <CHART> :: Match(CContext * pContext) const
494 {
495 // check number, for named
496 if( m_nnumber < 0 || m_nnumber >= pContext->m_captureindex.GetSize() ) return 0;
497
498 int index = pContext->m_captureindex[m_nnumber];
499 if( index < 0 ) return 0;
500
501 // check enclosed
502 int pos1 = pContext->m_capturestack[index + 1];
503 int pos2 = pContext->m_capturestack[index + 2];
504
505 if( pos2 < 0 ) pos2 = pContext->m_nCurrentPos;
506
507 // info
508 int lpos = pos1 < pos2 ? pos1 : pos2;
509 int rpos = pos1 < pos2 ? pos2 : pos1;
510 int slen = rpos - lpos;
511
512 const CHART * pcsz = (const CHART *)pContext->m_pMatchString;
513 int npos = pContext->m_nCurrentPos;
514 int tlen = pContext->m_pMatchStringLength;
515
516 // compare
517 int bsucc;
518 CBufferRefT <CHART> refstr(pcsz + lpos, slen);
519
520 if( m_brightleft )
521 {
522 if(npos < slen)
523 return 0;
524
525 if(m_bignorecase)
526 bsucc = ! refstr.nCompareNoCase(pcsz + (npos - slen));
527 else
528 bsucc = ! refstr.nCompare (pcsz + (npos - slen));
529
530 if( bsucc )
531 {
532 pContext->m_stack.Push(npos);
533 pContext->m_nCurrentPos -= slen;
534 }
535 }
536 else
537 {
538 if(npos + slen > tlen)
539 return 0;
540
541 if(m_bignorecase)
542 bsucc = ! refstr.nCompareNoCase(pcsz + npos);
543 else
544 bsucc = ! refstr.nCompare (pcsz + npos);
545
546 if( bsucc )
547 {
548 pContext->m_stack.Push(npos);
549 pContext->m_nCurrentPos += slen;
550 }
551 }
552
553 return bsucc;
554 }
555
MatchNext(CContext * pContext)556 template <class CHART> int CBackrefElxT <CHART> :: MatchNext(CContext * pContext) const
557 {
558 int npos = 0;
559
560 pContext->m_stack.Pop(npos);
561 pContext->m_nCurrentPos = npos;
562
563 return 0;
564 }
565
566 // RCHART
567 #ifndef RCHART
568 #define RCHART(ch) ((CHART)ch)
569 #endif
570
571 // BOUNDARY_TYPE
572 enum BOUNDARY_TYPE
573 {
574 BOUNDARY_FILE_BEGIN, // begin of whole text
575 BOUNDARY_FILE_END , // end of whole text
576 BOUNDARY_LINE_BEGIN, // begin of line
577 BOUNDARY_LINE_END , // end of line
578 BOUNDARY_WORD_BEGIN, // begin of word
579 BOUNDARY_WORD_END , // end of word
580 BOUNDARY_WORD_EDGE ,
581 };
582
583 //
584 // Boundary Elx
585 //
586 template <class CHART> class CBoundaryElxT : public ElxInterface
587 {
588 public:
589 int Match (CContext * pContext) const;
590 int MatchNext(CContext * pContext) const;
591
592 public:
593 CBoundaryElxT(int ntype, int byes = 1);
594
595 protected:
596 static int IsWordChar(CHART ch);
597
598 public:
599 int m_ntype;
600 int m_byes;
601 };
602
603 //
604 // Implementation
605 //
CBoundaryElxT(int ntype,int byes)606 template <class CHART> CBoundaryElxT <CHART> :: CBoundaryElxT(int ntype, int byes)
607 {
608 m_ntype = ntype;
609 m_byes = byes;
610 }
611
Match(CContext * pContext)612 template <class CHART> int CBoundaryElxT <CHART> :: Match(CContext * pContext) const
613 {
614 const CHART * pcsz = (const CHART *)pContext->m_pMatchString;
615 int npos = pContext->m_nCurrentPos;
616 int tlen = pContext->m_pMatchStringLength;
617
618 CHART chL = npos > 0 ? pcsz[npos - 1] : 0;
619 CHART chR = npos < tlen ? pcsz[npos ] : 0;
620
621 int bsucc = 0;
622
623 switch(m_ntype)
624 {
625 case BOUNDARY_FILE_BEGIN:
626 bsucc = (npos <= 0);
627 break;
628
629 case BOUNDARY_FILE_END:
630 bsucc = (npos >= tlen);
631 break;
632
633 case BOUNDARY_LINE_BEGIN:
634 bsucc = (npos <= 0 ) || (chL == RCHART('\n')) || ((chL == RCHART('\r')) && (chR != RCHART('\n')));
635 break;
636
637 case BOUNDARY_LINE_END:
638 bsucc = (npos >= tlen) || (chR == RCHART('\r')) || ((chR == RCHART('\n')) && (chL != RCHART('\r')));
639 break;
640
641 case BOUNDARY_WORD_BEGIN:
642 bsucc = ! IsWordChar(chL) && IsWordChar(chR);
643 break;
644
645 case BOUNDARY_WORD_END:
646 bsucc = IsWordChar(chL) && ! IsWordChar(chR);
647 break;
648
649 case BOUNDARY_WORD_EDGE:
650 bsucc = IsWordChar(chL) ? ! IsWordChar(chR) : IsWordChar(chR);
651 break;
652 }
653
654 return bsucc;
655 }
656
MatchNext(CContext *)657 template <class CHART> int CBoundaryElxT <CHART> :: MatchNext(CContext *) const
658 {
659 return 0;
660 }
661
IsWordChar(CHART ch)662 template <class CHART> inline int CBoundaryElxT <CHART> :: IsWordChar(CHART ch)
663 {
664 return (ch >= RCHART('A') && ch <= RCHART('Z')) || (ch >= RCHART('a') && ch <= RCHART('z')) || (ch >= RCHART('0') && ch <= RCHART('9')) || (ch == RCHART('_'));
665 }
666
667 //
668 // Bracket
669 //
670 template <class CHART> class CBracketElxT : public ElxInterface
671 {
672 public:
673 int Match (CContext * pContext) const;
674 int MatchNext(CContext * pContext) const;
675
676 public:
677 CBracketElxT(int nnumber, int bright);
678
679 public:
680 int m_nnumber;
681 int m_bright;
682
683 CBufferT <CHART> m_szNamed;
684 };
685
CBracketElxT(int nnumber,int bright)686 template <class CHART> CBracketElxT <CHART> :: CBracketElxT(int nnumber, int bright)
687 {
688 m_nnumber = nnumber;
689 m_bright = bright;
690 }
691
Match(CContext * pContext)692 template <class CHART> int CBracketElxT <CHART> :: Match(CContext * pContext) const
693 {
694 // check, for named
695 if(m_nnumber < 0) return 0;
696
697 if( ! m_bright )
698 {
699 pContext->m_captureindex.Prepare(m_nnumber, -1);
700 int index = pContext->m_captureindex[m_nnumber];
701
702 // check
703 if(index > 0 && index < pContext->m_capturestack.GetSize() && pContext->m_capturestack[index+2] < 0)
704 {
705 pContext->m_capturestack[index+3] --;
706 return 1;
707 }
708
709 // save
710 pContext->m_captureindex[m_nnumber] = pContext->m_capturestack.GetSize();
711
712 pContext->m_capturestack.Push(m_nnumber);
713 pContext->m_capturestack.Push(pContext->m_nCurrentPos);
714 pContext->m_capturestack.Push(-1);
715 pContext->m_capturestack.Push( 0); // z-index
716 }
717 else
718 {
719 // check
720 int index = pContext->m_captureindex[m_nnumber];
721
722 if(pContext->m_capturestack[index + 3] < 0)
723 {
724 pContext->m_capturestack[index + 3] ++;
725 return 1;
726 }
727
728 // save
729 pContext->m_capturestack[index + 2] = pContext->m_nCurrentPos;
730 pContext->m_capturestack[index + 3] = pContext->m_nParenZindex ++;
731 }
732
733 return 1;
734 }
735
MatchNext(CContext * pContext)736 template <class CHART> int CBracketElxT <CHART> :: MatchNext(CContext * pContext) const
737 {
738 int index = pContext->m_captureindex[m_nnumber];
739
740 if( ! m_bright )
741 {
742 if(pContext->m_capturestack[index + 3] < 0)
743 {
744 pContext->m_capturestack[index + 3] ++;
745 return 0;
746 }
747
748 pContext->m_capturestack.Restore(pContext->m_capturestack.GetSize() - 4);
749
750 // to find
751 index = pContext->m_capturestack.GetSize() - 4;
752 while(index >= 0 && pContext->m_capturestack[index] != m_nnumber) index -= 4;
753
754 // new index
755 pContext->m_captureindex[m_nnumber] = index;
756 }
757 else
758 {
759 if(pContext->m_capturestack[index + 3] < 0)
760 {
761 pContext->m_capturestack[index + 3] --;
762 return 0;
763 }
764
765 pContext->m_capturestack[index + 2] = -1;
766 pContext->m_capturestack[index + 3] = 0;
767 }
768
769 return 0;
770 }
771
772 //
773 // Deletage
774 //
775 template <class CHART> class CDelegateElxT : public ElxInterface
776 {
777 public:
778 int Match (CContext * pContext) const;
779 int MatchNext(CContext * pContext) const;
780
781 public:
782 CDelegateElxT(int ndata = 0);
783
784 public:
785 ElxInterface * m_pelx;
786 int m_ndata; // +0 : recursive to
787 // -3 : named recursive
788
789 CBufferT <CHART> m_szNamed;
790 };
791
CDelegateElxT(int ndata)792 template <class CHART> CDelegateElxT <CHART> :: CDelegateElxT(int ndata)
793 {
794 m_pelx = 0;
795 m_ndata = ndata;
796 }
797
Match(CContext * pContext)798 template <class CHART> int CDelegateElxT <CHART> :: Match(CContext * pContext) const
799 {
800 if(m_pelx != 0)
801 return m_pelx->Match(pContext);
802 else
803 return 1;
804 }
805
MatchNext(CContext * pContext)806 template <class CHART> int CDelegateElxT <CHART> :: MatchNext(CContext * pContext) const
807 {
808 if(m_pelx != 0)
809 return m_pelx->MatchNext(pContext);
810 else
811 return 0;
812 }
813
814 //
815 // Empty
816 //
817 template <int x> class CEmptyElxT : public ElxInterface
818 {
819 public:
820 int Match (CContext * pContext) const;
821 int MatchNext(CContext * pContext) const;
822
823 public:
824 CEmptyElxT();
825 };
826
827 typedef CEmptyElxT <0> CEmptyElx;
828
829 //
830 // Global
831 //
832 template <int x> class CGlobalElxT : public ElxInterface
833 {
834 public:
835 int Match (CContext * pContext) const;
836 int MatchNext(CContext * pContext) const;
837
838 public:
839 CGlobalElxT();
840 };
841
842 typedef CGlobalElxT <0> CGlobalElx;
843
844 //
845 // Repeat
846 //
847 template <int x> class CRepeatElxT : public ElxInterface
848 {
849 public:
850 int Match (CContext * pContext) const;
851 int MatchNext(CContext * pContext) const;
852
853 public:
854 CRepeatElxT(ElxInterface * pelx, int ntimes);
855
856 protected:
857 int MatchFixed (CContext * pContext) const;
858 int MatchNextFixed(CContext * pContext) const;
859
860 public:
861 ElxInterface * m_pelx;
862 int m_nfixed;
863 };
864
865 typedef CRepeatElxT <0> CRepeatElx;
866
867 //
868 // Greedy
869 //
870 template <int x> class CGreedyElxT : public CRepeatElxT <x>
871 {
872 public:
873 int Match (CContext * pContext) const;
874 int MatchNext(CContext * pContext) const;
875
876 public:
877 CGreedyElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
878
879 protected:
880 int MatchVart (CContext * pContext) const;
881 int MatchNextVart(CContext * pContext) const;
882
883 public:
884 int m_nvart;
885 };
886
887 typedef CGreedyElxT <0> CGreedyElx;
888
889 //
890 // Independent
891 //
892 template <int x> class CIndependentElxT : public ElxInterface
893 {
894 public:
895 int Match (CContext * pContext) const;
896 int MatchNext(CContext * pContext) const;
897
898 public:
899 CIndependentElxT(ElxInterface * pelx);
900
901 public:
902 ElxInterface * m_pelx;
903 };
904
905 typedef CIndependentElxT <0> CIndependentElx;
906
907 //
908 // List
909 //
910 template <int x> class CListElxT : public ElxInterface
911 {
912 public:
913 int Match (CContext * pContext) const;
914 int MatchNext(CContext * pContext) const;
915
916 public:
917 CListElxT(int brightleft);
918
919 public:
920 CBufferT <ElxInterface *> m_elxlist;
921 int m_brightleft;
922 };
923
924 typedef CListElxT <0> CListElx;
925
926 //
927 // Posix Elx
928 //
929 template <class CHART> class CPosixElxT : public ElxInterface
930 {
931 public:
932 int Match (CContext * pContext) const;
933 int MatchNext(CContext * pContext) const;
934
935 public:
936 CPosixElxT(const char * posix, int brightleft);
937
938 protected:
939 static int misblank(int c);
940
941 public:
942 int (*m_posixfun)(int);
943 int m_brightleft;
944 int m_byes;
945 };
946
947 //
948 // Implementation
949 //
CPosixElxT(const char * posix,int brightleft)950 template <class CHART> CPosixElxT <CHART> :: CPosixElxT(const char * posix, int brightleft)
951 {
952 m_brightleft = brightleft;
953
954 if(posix[1] == '^')
955 {
956 m_byes = 0;
957 posix += 2;
958 }
959 else
960 {
961 m_byes = 1;
962 posix += 1;
963 }
964
965 if (!strncmp(posix, "alnum:", 6)) m_posixfun = isalnum ;
966 else if(!strncmp(posix, "alpha:", 6)) m_posixfun = isalpha ;
967 else if(!strncmp(posix, "ascii:", 6)) m_posixfun = isascii ;
968 else if(!strncmp(posix, "cntrl:", 6)) m_posixfun = iscntrl ;
969 else if(!strncmp(posix, "digit:", 6)) m_posixfun = isdigit ;
970 else if(!strncmp(posix, "graph:", 6)) m_posixfun = isgraph ;
971 else if(!strncmp(posix, "lower:", 6)) m_posixfun = islower ;
972 else if(!strncmp(posix, "print:", 6)) m_posixfun = isprint ;
973 else if(!strncmp(posix, "punct:", 6)) m_posixfun = ispunct ;
974 else if(!strncmp(posix, "space:", 6)) m_posixfun = isspace ;
975 else if(!strncmp(posix, "upper:", 6)) m_posixfun = isupper ;
976 else if(!strncmp(posix, "xdigit:",7)) m_posixfun = isxdigit;
977 else if(!strncmp(posix, "blank:", 6)) m_posixfun = misblank;
978 else m_posixfun = 0 ;
979 }
980
misblank(int c)981 template <class CHART> int CPosixElxT <CHART> :: misblank(int c)
982 {
983 return c == 0x20 || c == '\t';
984 }
985
Match(CContext * pContext)986 template <class CHART> int CPosixElxT <CHART> :: Match(CContext * pContext) const
987 {
988 if(m_posixfun == 0) return 0;
989
990 int tlen = pContext->m_pMatchStringLength;
991 int npos = pContext->m_nCurrentPos;
992
993 // check
994 int at = m_brightleft ? npos - 1 : npos;
995 if( at < 0 || at >= tlen )
996 return 0;
997
998 CHART ch = ((const CHART *)pContext->m_pMatchString)[at];
999
1000 int bsucc = (*m_posixfun)(ch);
1001
1002 if( ! m_byes )
1003 bsucc = ! bsucc;
1004
1005 if( bsucc )
1006 pContext->m_nCurrentPos += m_brightleft ? -1 : 1;
1007
1008 return bsucc;
1009 }
1010
MatchNext(CContext * pContext)1011 template <class CHART> int CPosixElxT <CHART> :: MatchNext(CContext * pContext) const
1012 {
1013 pContext->m_nCurrentPos -= m_brightleft ? -1 : 1;
1014 return 0;
1015 }
1016
1017 //
1018 // Possessive
1019 //
1020 template <int x> class CPossessiveElxT : public CGreedyElxT <x>
1021 {
1022 public:
1023 int Match (CContext * pContext) const;
1024 int MatchNext(CContext * pContext) const;
1025
1026 public:
1027 CPossessiveElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
1028 };
1029
1030 typedef CPossessiveElxT <0> CPossessiveElx;
1031
1032 //
1033 // Range Elx
1034 //
1035 template <class CHART> class CRangeElxT : public ElxInterface
1036 {
1037 public:
1038 int Match (CContext * pContext) const;
1039 int MatchNext(CContext * pContext) const;
1040
1041 public:
1042 CRangeElxT(int brightleft, int byes);
1043
1044 public:
1045 CBufferT <CHART> m_ranges;
1046 CBufferT <CHART> m_chars;
1047 CBufferT <ElxInterface *> m_embeds;
1048
1049 public:
1050 int m_brightleft;
1051 int m_byes;
1052 };
1053
1054 //
1055 // Implementation
1056 //
CRangeElxT(int brightleft,int byes)1057 template <class CHART> CRangeElxT <CHART> :: CRangeElxT(int brightleft, int byes)
1058 {
1059 m_brightleft = brightleft;
1060 m_byes = byes;
1061 }
1062
Match(CContext * pContext)1063 template <class CHART> int CRangeElxT <CHART> :: Match(CContext * pContext) const
1064 {
1065 int tlen = pContext->m_pMatchStringLength;
1066 int npos = pContext->m_nCurrentPos;
1067
1068 // check
1069 int at = m_brightleft ? npos - 1 : npos;
1070 if( at < 0 || at >= tlen )
1071 return 0;
1072
1073 CHART ch = ((const CHART *)pContext->m_pMatchString)[at];
1074 int bsucc = 0, i;
1075
1076 // compare
1077 for(i=0; !bsucc && i<m_ranges.GetSize(); i+=2)
1078 {
1079 if(m_ranges[i] <= ch && ch <= m_ranges[i+1]) bsucc = 1;
1080 }
1081
1082 for(i=0; !bsucc && i<m_chars.GetSize(); i++)
1083 {
1084 if(m_chars[i] == ch) bsucc = 1;
1085 }
1086
1087 for(i=0; !bsucc && i<m_embeds.GetSize(); i++)
1088 {
1089 if(m_embeds[i]->Match(pContext))
1090 {
1091 pContext->m_nCurrentPos = npos;
1092 bsucc = 1;
1093 }
1094 }
1095
1096 if( ! m_byes )
1097 bsucc = ! bsucc;
1098
1099 if( bsucc )
1100 pContext->m_nCurrentPos += m_brightleft ? -1 : 1;
1101
1102 return bsucc;
1103 }
1104
MatchNext(CContext * pContext)1105 template <class CHART> int CRangeElxT <CHART> :: MatchNext(CContext * pContext) const
1106 {
1107 pContext->m_nCurrentPos -= m_brightleft ? -1 : 1;
1108 return 0;
1109 }
1110
1111 //
1112 // Reluctant
1113 //
1114 template <int x> class CReluctantElxT : public CRepeatElxT <x>
1115 {
1116 public:
1117 int Match (CContext * pContext) const;
1118 int MatchNext(CContext * pContext) const;
1119
1120 public:
1121 CReluctantElxT(ElxInterface * pelx, int nmin = 0, int nmax = INT_MAX);
1122
1123 protected:
1124 int MatchVart (CContext * pContext) const;
1125 int MatchNextVart(CContext * pContext) const;
1126
1127 public:
1128 int m_nvart;
1129 };
1130
1131 typedef CReluctantElxT <0> CReluctantElx;
1132
1133 //
1134 // String Elx
1135 //
1136 template <class CHART> class CStringElxT : public ElxInterface
1137 {
1138 public:
1139 int Match (CContext * pContext) const;
1140 int MatchNext(CContext * pContext) const;
1141
1142 public:
1143 CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase);
1144
1145 public:
1146 CBufferT <CHART> m_szPattern;
1147 int m_brightleft;
1148 int m_bignorecase;
1149 };
1150
1151 //
1152 // Implementation
1153 //
CStringElxT(const CHART * fixed,int nlength,int brightleft,int bignorecase)1154 template <class CHART> CStringElxT <CHART> :: CStringElxT(const CHART * fixed, int nlength, int brightleft, int bignorecase) : m_szPattern(fixed, nlength)
1155 {
1156 m_brightleft = brightleft;
1157 m_bignorecase = bignorecase;
1158 }
1159
Match(CContext * pContext)1160 template <class CHART> int CStringElxT <CHART> :: Match(CContext * pContext) const
1161 {
1162 const CHART * pcsz = (const CHART *)pContext->m_pMatchString;
1163 int npos = pContext->m_nCurrentPos;
1164 int tlen = pContext->m_pMatchStringLength;
1165 int slen = m_szPattern.GetSize();
1166
1167 int bsucc;
1168
1169 if(m_brightleft)
1170 {
1171 if(npos < slen)
1172 return 0;
1173
1174 if(m_bignorecase)
1175 bsucc = ! m_szPattern.nCompareNoCase(pcsz + (npos - slen));
1176 else
1177 bsucc = ! m_szPattern.nCompare (pcsz + (npos - slen));
1178
1179 if( bsucc )
1180 pContext->m_nCurrentPos -= slen;
1181 }
1182 else
1183 {
1184 if(npos + slen > tlen)
1185 return 0;
1186
1187 if(m_bignorecase)
1188 bsucc = ! m_szPattern.nCompareNoCase(pcsz + npos);
1189 else
1190 bsucc = ! m_szPattern.nCompare (pcsz + npos);
1191
1192 if( bsucc )
1193 pContext->m_nCurrentPos += slen;
1194 }
1195
1196 return bsucc;
1197 }
1198
MatchNext(CContext * pContext)1199 template <class CHART> int CStringElxT <CHART> :: MatchNext(CContext * pContext) const
1200 {
1201 int slen = m_szPattern.GetSize();
1202
1203 if(m_brightleft)
1204 pContext->m_nCurrentPos += slen;
1205 else
1206 pContext->m_nCurrentPos -= slen;
1207
1208 return 0;
1209 }
1210
1211 //
1212 // CConditionElx
1213 //
1214 template <class CHART> class CConditionElxT : public ElxInterface
1215 {
1216 public:
1217 int Match (CContext * pContext) const;
1218 int MatchNext(CContext * pContext) const;
1219
1220 public:
1221 CConditionElxT();
1222
1223 public:
1224 // backref condition
1225 int m_nnumber;
1226 CBufferT <CHART> m_szNamed;
1227
1228 // elx condition
1229 ElxInterface * m_pelxask;
1230
1231 // selection
1232 ElxInterface * m_pelxyes, * m_pelxno;
1233 };
1234
CConditionElxT()1235 template <class CHART> CConditionElxT <CHART> :: CConditionElxT()
1236 {
1237 m_nnumber = -1;
1238 }
1239
Match(CContext * pContext)1240 template <class CHART> int CConditionElxT <CHART> :: Match(CContext * pContext) const
1241 {
1242 // status
1243 int nbegin = pContext->m_nCurrentPos;
1244 int nsize = pContext->m_stack.GetSize();
1245 int ncsize = pContext->m_capturestack.GetSize();
1246
1247 // condition result
1248 int condition_yes = 0;
1249
1250 // backref type
1251 if( m_nnumber >= 0 )
1252 {
1253 do
1254 {
1255 if(m_nnumber >= pContext->m_captureindex.GetSize()) break;
1256
1257 int index = pContext->m_captureindex[m_nnumber];
1258 if( index < 0) break;
1259
1260 // else valid
1261 condition_yes = 1;
1262 }
1263 while(0);
1264 }
1265 else
1266 {
1267 if( m_pelxask == 0 )
1268 condition_yes = 1;
1269 else
1270 condition_yes = m_pelxask->Match(pContext);
1271
1272 pContext->m_stack.Restore(nsize);
1273 pContext->m_nCurrentPos = nbegin;
1274 }
1275
1276 // elx result
1277 int bsucc;
1278 if( condition_yes )
1279 bsucc = m_pelxyes == 0 ? 1 : m_pelxyes->Match(pContext);
1280 else
1281 bsucc = m_pelxno == 0 ? 1 : m_pelxno ->Match(pContext);
1282
1283 if( bsucc )
1284 {
1285 pContext->m_stack.Push(ncsize);
1286 pContext->m_stack.Push(condition_yes);
1287 }
1288 else
1289 {
1290 pContext->m_capturestack.Restore(ncsize);
1291 }
1292
1293 return bsucc;
1294 }
1295
MatchNext(CContext * pContext)1296 template <class CHART> int CConditionElxT <CHART> :: MatchNext(CContext * pContext) const
1297 {
1298 // pop
1299 int ncsize, condition_yes;
1300
1301 pContext->m_stack.Pop(condition_yes);
1302 pContext->m_stack.Pop(ncsize);
1303
1304 // elx result
1305 int bsucc;
1306 if( condition_yes )
1307 bsucc = m_pelxyes == 0 ? 0 : m_pelxyes->MatchNext(pContext);
1308 else
1309 bsucc = m_pelxno == 0 ? 0 : m_pelxno ->MatchNext(pContext);
1310
1311 if( bsucc )
1312 {
1313 pContext->m_stack.Push(ncsize);
1314 pContext->m_stack.Push(condition_yes);
1315 }
1316 else
1317 {
1318 pContext->m_capturestack.Restore(ncsize);
1319 }
1320
1321 return bsucc;
1322 }
1323
1324 //
1325 // MatchResult
1326 //
1327 template <int x> class MatchResultT
1328 {
1329 public:
1330 int IsMatched() const;
1331
1332 public:
1333 int GetStart() const;
1334 int GetEnd () const;
1335
1336 public:
1337 int MaxGroupNumber() const;
1338 int GetGroupStart(int nGroupNumber) const;
1339 int GetGroupEnd (int nGroupNumber) const;
1340
1341 public:
1342 MatchResultT(CContext * pContext, int nMaxNumber = -1);
1343 MatchResultT <x> & operator = (const MatchResultT <x> &);
1344 inline operator int() const { return IsMatched(); }
1345
1346 public:
1347 CBufferT <int> m_result;
1348 };
1349
1350 typedef MatchResultT <0> MatchResult;
1351
1352 // Stocked Elx IDs
1353 enum STOCKELX_ID_DEFINES
1354 {
1355 STOCKELX_EMPTY = 0,
1356
1357 ///////////////////////
1358
1359 STOCKELX_DOT_ALL,
1360 STOCKELX_DOT_NOT_ALL,
1361
1362 STOCKELX_WORD,
1363 STOCKELX_WORD_NOT,
1364
1365 STOCKELX_SPACE,
1366 STOCKELX_SPACE_NOT,
1367
1368 STOCKELX_DIGITAL,
1369 STOCKELX_DIGITAL_NOT,
1370
1371 //////////////////////
1372
1373 STOCKELX_DOT_ALL_RIGHTLEFT,
1374 STOCKELX_DOT_NOT_ALL_RIGHTLEFT,
1375
1376 STOCKELX_WORD_RIGHTLEFT,
1377 STOCKELX_WORD_RIGHTLEFT_NOT,
1378
1379 STOCKELX_SPACE_RIGHTLEFT,
1380 STOCKELX_SPACE_RIGHTLEFT_NOT,
1381
1382 STOCKELX_DIGITAL_RIGHTLEFT,
1383 STOCKELX_DIGITAL_RIGHTLEFT_NOT,
1384
1385 /////////////////////
1386
1387 STOCKELX_COUNT
1388 };
1389
1390 // REGEX_FLAGS
1391 #ifndef _REGEX_FLAGS_DEFINED
1392 enum REGEX_FLAGS
1393 {
1394 NO_FLAG = 0,
1395 SINGLELINE = 0x01,
1396 MULTILINE = 0x02,
1397 GLOBAL = 0x04,
1398 IGNORECASE = 0x08,
1399 RIGHTTOLEFT = 0x10,
1400 EXTENDED = 0x20,
1401 };
1402 #define _REGEX_FLAGS_DEFINED
1403 #endif
1404
1405 //
1406 // Builder T
1407 //
1408 template <class CHART> class CBuilderT
1409 {
1410 public:
1411 typedef CDelegateElxT <CHART> CDelegateElx;
1412 typedef CBracketElxT <CHART> CBracketElx;
1413 typedef CBackrefElxT <CHART> CBackrefElx;
1414 typedef CConditionElxT <CHART> CConditionElx;
1415
1416 // Methods
1417 public:
1418 ElxInterface * Build(const CBufferRefT <CHART> & pattern, int flags);
1419 int GetNamedNumber(const CBufferRefT <CHART> & named) const;
1420 void Clear();
1421
1422 public:
1423 CBuilderT();
1424 ~CBuilderT();
1425
1426 // Public Attributes
1427 public:
1428 ElxInterface * m_pTopElx;
1429 int m_nFlags;
1430 int m_nMaxNumber;
1431 int m_nNextNamed;
1432 int m_nGroupCount;
1433
1434 CBufferT <ElxInterface *> m_objlist;
1435 CBufferT <ElxInterface *> m_grouplist;
1436 CBufferT <CDelegateElx *> m_recursivelist;
1437 CBufferT <CListElx *> m_namedlist;
1438 CBufferT <CBackrefElx *> m_namedbackreflist;
1439 CBufferT <CConditionElx *> m_namedconditionlist;
1440
1441 // CHART_INFO
1442 protected:
1443 struct CHART_INFO
1444 {
1445 public:
1446 CHART ch;
1447 int type;
1448 int pos;
1449 int len;
1450
1451 public:
1452 CHART_INFO(CHART c, int t, int p = 0, int l = 0) { ch = c; type = t; pos = p; len = l; }
1453 inline int operator == (const CHART_INFO & ci) { return ch == ci.ch && type == ci.type; }
1454 inline int operator != (const CHART_INFO & ci) { return ! operator == (ci); }
1455 };
1456
1457 protected:
1458 static unsigned int Hex2Int(const CHART * pcsz, int length, int & used);
1459 static int ReadDec(char * & str, unsigned int & dec);
1460 void MoveNext();
1461 int GetNext2();
1462
1463 ElxInterface * BuildAlternative(int vaflags);
1464 ElxInterface * BuildList (int & flags);
1465 ElxInterface * BuildRepeat (int & flags);
1466 ElxInterface * BuildSimple (int & flags);
1467 ElxInterface * BuildCharset (int & flags);
1468 ElxInterface * BuildRecursive (int & flags);
1469 ElxInterface * BuildBoundary (int & flags);
1470 ElxInterface * BuildBackref (int & flags);
1471
1472 ElxInterface * GetStockElx (int nStockId);
1473 ElxInterface * Keep(ElxInterface * pElx);
1474
1475 // Private Attributes
1476 protected:
1477 CBufferRefT <CHART> m_pattern;
1478 CHART_INFO prev, curr, next, nex2;
1479 int m_nNextPos;
1480 int m_nCharsetDepth;
1481 int m_bQuoted;
1482 int (*m_quote_fun)(int);
1483
1484 ElxInterface * m_pStockElxs[STOCKELX_COUNT];
1485 };
1486
1487 //
1488 // Implementation
1489 //
CBuilderT()1490 template <class CHART> CBuilderT <CHART> :: CBuilderT() : m_pattern(0, 0), prev(0, 0), curr(0, 0), next(0, 0), nex2(0, 0)
1491 {
1492 Clear();
1493 }
1494
~CBuilderT()1495 template <class CHART> CBuilderT <CHART> :: ~CBuilderT()
1496 {
1497 Clear();
1498 }
1499
GetNamedNumber(const CBufferRefT<CHART> & named)1500 template <class CHART> int CBuilderT <CHART> :: GetNamedNumber(const CBufferRefT <CHART> & named) const
1501 {
1502 for(int i=0; i<m_namedlist.GetSize(); i++)
1503 {
1504 if( ! ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_szNamed.CompareNoCase(named) )
1505 return ((CBracketElx *)m_namedlist[i]->m_elxlist[0])->m_nnumber;
1506 }
1507
1508 return -3;
1509 }
1510
Build(const CBufferRefT<CHART> & pattern,int flags)1511 template <class CHART> ElxInterface * CBuilderT <CHART> :: Build(const CBufferRefT <CHART> & pattern, int flags)
1512 {
1513 // init
1514 m_pattern = pattern;
1515 m_nNextPos = 0;
1516 m_nCharsetDepth = 0;
1517 m_nMaxNumber = 0;
1518 m_nNextNamed = 0;
1519 m_nFlags = flags;
1520 m_bQuoted = 0;
1521 m_quote_fun = 0;
1522
1523 m_grouplist .Restore(0);
1524 m_recursivelist .Restore(0);
1525 m_namedlist .Restore(0);
1526 m_namedbackreflist .Restore(0);
1527 m_namedconditionlist.Restore(0);
1528
1529 int i;
1530 for(i=0; i<3; i++) MoveNext();
1531
1532 // build
1533 m_pTopElx = BuildAlternative(flags);
1534
1535 // group 0
1536 m_grouplist.Prepare(0);
1537 m_grouplist[0] = m_pTopElx;
1538
1539 // append named to unnamed
1540 m_nGroupCount = m_grouplist.GetSize();
1541
1542 m_grouplist.Prepare(m_nMaxNumber + m_namedlist.GetSize());
1543
1544 for(i=0; i<m_namedlist.GetSize(); i++)
1545 {
1546 CBracketElx * pleft = (CBracketElx *)m_namedlist[i]->m_elxlist[0];
1547 CBracketElx * pright = (CBracketElx *)m_namedlist[i]->m_elxlist[2];
1548
1549 // append
1550 m_grouplist[m_nGroupCount ++] = m_namedlist[i];
1551
1552 if( pleft->m_nnumber > 0 )
1553 continue;
1554
1555 // same name
1556 int find_same_name = GetNamedNumber(pleft->m_szNamed);
1557 if( find_same_name >= 0 )
1558 {
1559 pleft ->m_nnumber = find_same_name;
1560 pright->m_nnumber = find_same_name;
1561 }
1562 else
1563 {
1564 m_nMaxNumber ++;
1565
1566 pleft ->m_nnumber = m_nMaxNumber;
1567 pright->m_nnumber = m_nMaxNumber;
1568 }
1569 }
1570
1571 for(i=1; i<m_nGroupCount; i++)
1572 {
1573 CBracketElx * pleft = (CBracketElx *)((CListElx*)m_grouplist[i])->m_elxlist[0];
1574
1575 if( pleft->m_nnumber > m_nMaxNumber )
1576 m_nMaxNumber = pleft->m_nnumber;
1577 }
1578
1579 // connect recursive
1580 for(i=0; i<m_recursivelist.GetSize(); i++)
1581 {
1582 if( m_recursivelist[i]->m_ndata == -3 )
1583 m_recursivelist[i]->m_ndata = GetNamedNumber(m_recursivelist[i]->m_szNamed);
1584
1585 if( m_recursivelist[i]->m_ndata >= 0 && m_recursivelist[i]->m_ndata < m_grouplist.GetSize() )
1586 m_recursivelist[i]->m_pelx = m_grouplist[m_recursivelist[i]->m_ndata];
1587 }
1588
1589 // named backref
1590 for(i=0; i<m_namedbackreflist.GetSize(); i++)
1591 {
1592 m_namedbackreflist[i]->m_nnumber = GetNamedNumber(m_namedbackreflist[i]->m_szNamed);
1593 }
1594
1595 // named condition
1596 for(i=0; i<m_namedconditionlist.GetSize(); i++)
1597 {
1598 int nn = GetNamedNumber(m_namedconditionlist[i]->m_szNamed);
1599 if( nn >= 0 )
1600 {
1601 m_namedconditionlist[i]->m_nnumber = nn;
1602 m_namedconditionlist[i]->m_pelxask = 0;
1603 }
1604 }
1605
1606 return m_pTopElx;
1607 }
1608
Clear()1609 template <class CHART> void CBuilderT <CHART> :: Clear()
1610 {
1611 for(int i=0; i<m_objlist.GetSize(); i++)
1612 {
1613 delete m_objlist[i];
1614 }
1615
1616 m_objlist.Restore(0);
1617 m_pTopElx = 0;
1618
1619 memset(m_pStockElxs, 0, sizeof(m_pStockElxs));
1620 }
1621
1622 //
1623 // hex to int
1624 //
Hex2Int(const CHART * pcsz,int length,int & used)1625 template <class CHART> unsigned int CBuilderT <CHART> :: Hex2Int(const CHART * pcsz, int length, int & used)
1626 {
1627 unsigned int result = 0;
1628 int & i = used;
1629
1630 for(i=0; i<length; i++)
1631 {
1632 if(pcsz[i] >= RCHART('0') && pcsz[i] <= RCHART('9'))
1633 result = (result << 4) + (pcsz[i] - RCHART('0'));
1634 else if(pcsz[i] >= RCHART('A') && pcsz[i] <= RCHART('F'))
1635 result = (result << 4) + (0x0A + (pcsz[i] - RCHART('A')));
1636 else if(pcsz[i] >= RCHART('a') && pcsz[i] <= RCHART('f'))
1637 result = (result << 4) + (0x0A + (pcsz[i] - RCHART('a')));
1638 else
1639 break;
1640 }
1641
1642 return result;
1643 }
1644
Keep(ElxInterface * pelx)1645 template <class CHART> inline ElxInterface * CBuilderT <CHART> :: Keep(ElxInterface * pelx)
1646 {
1647 m_objlist.Push(pelx);
1648 return pelx;
1649 }
1650
MoveNext()1651 template <class CHART> void CBuilderT <CHART> :: MoveNext()
1652 {
1653 // forwards
1654 prev = curr;
1655 curr = next;
1656 next = nex2;
1657
1658 // get nex2
1659 while( ! GetNext2() ) {};
1660 }
1661
GetNext2()1662 template <class CHART> int CBuilderT <CHART> :: GetNext2()
1663 {
1664 // check length
1665 if(m_nNextPos >= m_pattern.GetSize())
1666 {
1667 nex2 = CHART_INFO(0, 1, m_nNextPos, 0);
1668 return 1;
1669 }
1670
1671 int delta = 1;
1672 CHART ch = m_pattern[m_nNextPos];
1673
1674 // if quoted
1675 if(m_bQuoted)
1676 {
1677 if(ch == RCHART('\\'))
1678 {
1679 if(m_pattern[m_nNextPos + 1] == RCHART('E'))
1680 {
1681 m_quote_fun = 0;
1682 m_bQuoted = 0;
1683 m_nNextPos += 2;
1684 return 0;
1685 }
1686 }
1687
1688 if(m_quote_fun != 0)
1689 nex2 = CHART_INFO((CHART)(*m_quote_fun)((int)ch), 0, m_nNextPos, delta);
1690 else
1691 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1692
1693 m_nNextPos += delta;
1694
1695 return 1;
1696 }
1697
1698 // common
1699 switch(ch)
1700 {
1701 case RCHART('\\'):
1702 {
1703 CHART ch1 = m_pattern[m_nNextPos+1];
1704
1705 // backref
1706 if(ch1 >= RCHART('0') && ch1 <= RCHART('9'))
1707 {
1708 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1709 break;
1710 }
1711
1712 // escape
1713 delta = 2;
1714
1715 switch(ch1)
1716 {
1717 case RCHART('A'):
1718 case RCHART('Z'):
1719 case RCHART('w'):
1720 case RCHART('W'):
1721 case RCHART('s'):
1722 case RCHART('S'):
1723 case RCHART('B'):
1724 case RCHART('d'):
1725 case RCHART('D'):
1726 case RCHART('k'):
1727 case RCHART('g'):
1728 nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
1729 break;
1730
1731 case RCHART('b'):
1732 if(m_nCharsetDepth > 0)
1733 nex2 = CHART_INFO('\b', 0, m_nNextPos, delta);
1734 else
1735 nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
1736 break;
1737
1738 /*
1739 case RCHART('<'):
1740 case RCHART('>'):
1741 if(m_nCharsetDepth > 0)
1742 nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
1743 else
1744 nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
1745 break;
1746 */
1747
1748 case RCHART('x'):
1749 if(m_pattern[m_nNextPos+2] != '{')
1750 {
1751 int red = 0;
1752 unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 2, red);
1753
1754 delta += red;
1755
1756 if(red > 0)
1757 nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
1758 else
1759 nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
1760
1761 break;
1762 }
1763
1764 case RCHART('u'):
1765 if(m_pattern[m_nNextPos+2] != '{')
1766 {
1767 int red = 0;
1768 unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 2, 4, red);
1769
1770 delta += red;
1771
1772 if(red > 0)
1773 nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
1774 else
1775 nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
1776 }
1777 else
1778 {
1779 int red = 0;
1780 unsigned int ch2 = Hex2Int(m_pattern.GetBuffer() + m_nNextPos + 3, sizeof(int) * 2, red);
1781
1782 delta += red;
1783
1784 while(m_nNextPos + delta < m_pattern.GetSize() && m_pattern.At(m_nNextPos + delta) != RCHART('}'))
1785 delta ++;
1786
1787 delta ++; // skip '}'
1788
1789 nex2 = CHART_INFO(RCHART(ch2), 0, m_nNextPos, delta);
1790 }
1791 break;
1792
1793 case RCHART('a'): nex2 = CHART_INFO(RCHART('\a'), 0, m_nNextPos, delta); break;
1794 case RCHART('f'): nex2 = CHART_INFO(RCHART('\f'), 0, m_nNextPos, delta); break;
1795 case RCHART('n'): nex2 = CHART_INFO(RCHART('\n'), 0, m_nNextPos, delta); break;
1796 case RCHART('r'): nex2 = CHART_INFO(RCHART('\r'), 0, m_nNextPos, delta); break;
1797 case RCHART('t'): nex2 = CHART_INFO(RCHART('\t'), 0, m_nNextPos, delta); break;
1798 case RCHART('v'): nex2 = CHART_INFO(RCHART('\v'), 0, m_nNextPos, delta); break;
1799 case RCHART('e'): nex2 = CHART_INFO(RCHART( 27 ), 0, m_nNextPos, delta); break;
1800
1801 case RCHART('G'): // skip '\G'
1802 if(m_nCharsetDepth > 0)
1803 {
1804 m_nNextPos += 2;
1805 return 0;
1806 }
1807 else
1808 {
1809 nex2 = CHART_INFO(ch1, 1, m_nNextPos, delta);
1810 break;
1811 }
1812
1813 case RCHART('L'):
1814 if( ! m_quote_fun ) m_quote_fun = ::tolower;
1815
1816 case RCHART('U'):
1817 if( ! m_quote_fun ) m_quote_fun = ::toupper;
1818
1819 case RCHART('Q'):
1820 {
1821 m_bQuoted = 1;
1822 m_nNextPos += 2;
1823 return 0;
1824 }
1825
1826 case RCHART('E'):
1827 {
1828 m_quote_fun = 0;
1829 m_bQuoted = 0;
1830 m_nNextPos += 2;
1831 return 0;
1832 }
1833
1834 case 0:
1835 if(m_nNextPos+1 >= m_pattern.GetSize())
1836 {
1837 delta = 1;
1838 nex2 = CHART_INFO(ch , 0, m_nNextPos, delta);
1839 }
1840 else
1841 nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta); // common '\0' char
1842 break;
1843
1844 default:
1845 nex2 = CHART_INFO(ch1, 0, m_nNextPos, delta);
1846 break;
1847 }
1848 }
1849 break;
1850
1851 case RCHART('*'):
1852 case RCHART('+'):
1853 case RCHART('?'):
1854 case RCHART('.'):
1855 case RCHART('{'):
1856 case RCHART('}'):
1857 case RCHART(')'):
1858 case RCHART('|'):
1859 case RCHART('$'):
1860 if(m_nCharsetDepth > 0)
1861 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1862 else
1863 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1864 break;
1865
1866 case RCHART('-'):
1867 if(m_nCharsetDepth > 0)
1868 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1869 else
1870 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1871 break;
1872
1873 case RCHART('('):
1874 {
1875 CHART ch1 = m_pattern[m_nNextPos+1];
1876 CHART ch2 = m_pattern[m_nNextPos+2];
1877
1878 // skip remark
1879 if(ch1 == RCHART('?') && ch2 == RCHART('#'))
1880 {
1881 m_nNextPos += 2;
1882 while(m_nNextPos < m_pattern.GetSize())
1883 {
1884 if(m_pattern[m_nNextPos] == RCHART(')'))
1885 break;
1886
1887 m_nNextPos ++;
1888 }
1889
1890 if(m_pattern[m_nNextPos] == RCHART(')'))
1891 {
1892 m_nNextPos ++;
1893
1894 // get next nex2
1895 return 0;
1896 }
1897 }
1898 else
1899 {
1900 if(m_nCharsetDepth > 0)
1901 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1902 else
1903 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1904 }
1905 }
1906 break;
1907
1908 case RCHART('#'):
1909 if(m_nFlags & EXTENDED)
1910 {
1911 // skip remark
1912 m_nNextPos ++;
1913
1914 while(m_nNextPos < m_pattern.GetSize())
1915 {
1916 if(m_pattern[m_nNextPos] == RCHART('\n') || m_pattern[m_nNextPos] == RCHART('\r'))
1917 break;
1918
1919 m_nNextPos ++;
1920 }
1921
1922 // get next nex2
1923 return 0;
1924 }
1925 else
1926 {
1927 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1928 }
1929 break;
1930
1931 case RCHART(' '):
1932 case RCHART('\f'):
1933 case RCHART('\n'):
1934 case RCHART('\r'):
1935 case RCHART('\t'):
1936 case RCHART('\v'):
1937 if(m_nFlags & EXTENDED)
1938 {
1939 m_nNextPos ++;
1940
1941 // get next nex2
1942 return 0;
1943 }
1944 else
1945 {
1946 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1947 }
1948 break;
1949
1950 case RCHART('['):
1951 m_nCharsetDepth ++;
1952 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1953 break;
1954
1955 case RCHART(']'):
1956 if(m_nCharsetDepth > 0)
1957 {
1958 m_nCharsetDepth --;
1959 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1960 }
1961 else
1962 {
1963 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1964 }
1965 break;
1966
1967 case RCHART(':'):
1968 if(next == CHART_INFO(RCHART('['), 1))
1969 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1970 else
1971 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1972 break;
1973
1974 case RCHART('^'):
1975 if(m_nCharsetDepth == 0 || next == CHART_INFO(RCHART('['), 1) || (curr == CHART_INFO(RCHART('['), 1) && next == CHART_INFO(RCHART(':'), 1)))
1976 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta);
1977 else
1978 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1979 break;
1980
1981 case 0:
1982 if(m_nNextPos >= m_pattern.GetSize())
1983 nex2 = CHART_INFO(ch, 1, m_nNextPos, delta); // end of string
1984 else
1985 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta); // common '\0' char
1986 break;
1987
1988 default:
1989 nex2 = CHART_INFO(ch, 0, m_nNextPos, delta);
1990 break;
1991 }
1992
1993 m_nNextPos += delta;
1994
1995 return 1;
1996 }
1997
GetStockElx(int nStockId)1998 template <class CHART> ElxInterface * CBuilderT <CHART> :: GetStockElx(int nStockId)
1999 {
2000 ElxInterface ** pStockElxs = m_pStockElxs;
2001
2002 // check
2003 if(nStockId < 0 || nStockId >= STOCKELX_COUNT)
2004 return GetStockElx(0);
2005
2006 // create if no
2007 if(pStockElxs[nStockId] == 0)
2008 {
2009 switch(nStockId)
2010 {
2011 case STOCKELX_EMPTY:
2012 pStockElxs[nStockId] = Keep(new CEmptyElx());
2013 break;
2014
2015 case STOCKELX_WORD:
2016 {
2017 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 1));
2018
2019 pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z'));
2020 pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z'));
2021 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2022 pRange->m_chars .Push(RCHART('_'));
2023
2024 pStockElxs[nStockId] = pRange;
2025 }
2026 break;
2027
2028 case STOCKELX_WORD_NOT:
2029 {
2030 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 0));
2031
2032 pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z'));
2033 pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z'));
2034 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2035 pRange->m_chars .Push(RCHART('_'));
2036
2037 pStockElxs[nStockId] = pRange;
2038 }
2039 break;
2040
2041 case STOCKELX_DOT_ALL:
2042 pStockElxs[nStockId] = Keep(new CRangeElxT <CHART> (0, 0));
2043 break;
2044
2045 case STOCKELX_DOT_NOT_ALL:
2046 {
2047 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 0));
2048
2049 pRange->m_chars .Push(RCHART('\n'));
2050
2051 pStockElxs[nStockId] = pRange;
2052 }
2053 break;
2054
2055 case STOCKELX_SPACE:
2056 {
2057 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 1));
2058
2059 pRange->m_chars .Push(RCHART(' '));
2060 pRange->m_chars .Push(RCHART('\t'));
2061 pRange->m_chars .Push(RCHART('\r'));
2062 pRange->m_chars .Push(RCHART('\n'));
2063
2064 pStockElxs[nStockId] = pRange;
2065 }
2066 break;
2067
2068 case STOCKELX_SPACE_NOT:
2069 {
2070 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 0));
2071
2072 pRange->m_chars .Push(RCHART(' '));
2073 pRange->m_chars .Push(RCHART('\t'));
2074 pRange->m_chars .Push(RCHART('\r'));
2075 pRange->m_chars .Push(RCHART('\n'));
2076
2077 pStockElxs[nStockId] = pRange;
2078 }
2079 break;
2080
2081 case STOCKELX_DIGITAL:
2082 {
2083 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 1));
2084
2085 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2086
2087 pStockElxs[nStockId] = pRange;
2088 }
2089 break;
2090
2091 case STOCKELX_DIGITAL_NOT:
2092 {
2093 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (0, 0));
2094
2095 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2096
2097 pStockElxs[nStockId] = pRange;
2098 }
2099 break;
2100
2101 case STOCKELX_WORD_RIGHTLEFT:
2102 {
2103 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 1));
2104
2105 pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z'));
2106 pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z'));
2107 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2108 pRange->m_chars .Push(RCHART('_'));
2109
2110 pStockElxs[nStockId] = pRange;
2111 }
2112 break;
2113
2114 case STOCKELX_WORD_RIGHTLEFT_NOT:
2115 {
2116 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 0));
2117
2118 pRange->m_ranges.Push(RCHART('A')); pRange->m_ranges.Push(RCHART('Z'));
2119 pRange->m_ranges.Push(RCHART('a')); pRange->m_ranges.Push(RCHART('z'));
2120 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2121 pRange->m_chars .Push(RCHART('_'));
2122
2123 pStockElxs[nStockId] = pRange;
2124 }
2125 break;
2126
2127 case STOCKELX_DOT_ALL_RIGHTLEFT:
2128 pStockElxs[nStockId] = Keep(new CRangeElxT <CHART> (1, 0));
2129 break;
2130
2131 case STOCKELX_DOT_NOT_ALL_RIGHTLEFT:
2132 {
2133 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 0));
2134
2135 pRange->m_chars .Push(RCHART('\n'));
2136
2137 pStockElxs[nStockId] = pRange;
2138 }
2139 break;
2140
2141 case STOCKELX_SPACE_RIGHTLEFT:
2142 {
2143 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 1));
2144
2145 pRange->m_chars .Push(RCHART(' '));
2146 pRange->m_chars .Push(RCHART('\t'));
2147 pRange->m_chars .Push(RCHART('\r'));
2148 pRange->m_chars .Push(RCHART('\n'));
2149 pRange->m_chars .Push(RCHART('\f'));
2150 pRange->m_chars .Push(RCHART('\v'));
2151
2152 pStockElxs[nStockId] = pRange;
2153 }
2154 break;
2155
2156 case STOCKELX_SPACE_RIGHTLEFT_NOT:
2157 {
2158 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 0));
2159
2160 pRange->m_chars .Push(RCHART(' '));
2161 pRange->m_chars .Push(RCHART('\t'));
2162 pRange->m_chars .Push(RCHART('\r'));
2163 pRange->m_chars .Push(RCHART('\n'));
2164 pRange->m_chars .Push(RCHART('\f'));
2165 pRange->m_chars .Push(RCHART('\v'));
2166
2167 pStockElxs[nStockId] = pRange;
2168 }
2169 break;
2170
2171 case STOCKELX_DIGITAL_RIGHTLEFT:
2172 {
2173 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 1));
2174
2175 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2176
2177 pStockElxs[nStockId] = pRange;
2178 }
2179 break;
2180
2181 case STOCKELX_DIGITAL_RIGHTLEFT_NOT:
2182 {
2183 CRangeElxT <CHART> * pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (1, 0));
2184
2185 pRange->m_ranges.Push(RCHART('0')); pRange->m_ranges.Push(RCHART('9'));
2186
2187 pStockElxs[nStockId] = pRange;
2188 }
2189 break;
2190 }
2191 }
2192
2193 // return
2194 return pStockElxs[nStockId];
2195 }
2196
BuildAlternative(int vaflags)2197 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildAlternative(int vaflags)
2198 {
2199 if(curr == CHART_INFO(0, 1))
2200 return GetStockElx(STOCKELX_EMPTY);
2201
2202 // flag instance
2203 int flags = vaflags;
2204
2205 // first part
2206 ElxInterface * pAlternativeOne = BuildList(flags);
2207
2208 // check alternative
2209 if(curr == CHART_INFO(RCHART('|'), 1))
2210 {
2211 CAlternativeElx * pAlternative = (CAlternativeElx *)Keep(new CAlternativeElx());
2212 pAlternative->m_elxlist.Push(pAlternativeOne);
2213
2214 // loop
2215 while(curr == CHART_INFO(RCHART('|'), 1))
2216 {
2217 // skip '|' itself
2218 MoveNext();
2219
2220 pAlternativeOne = BuildList(flags);
2221 pAlternative->m_elxlist.Push(pAlternativeOne);
2222 }
2223
2224 return pAlternative;
2225 }
2226
2227 return pAlternativeOne;
2228 }
2229
BuildList(int & flags)2230 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildList(int & flags)
2231 {
2232 if(curr == CHART_INFO(0, 1) || curr == CHART_INFO(RCHART('|'), 1) || curr == CHART_INFO(RCHART(')'), 1))
2233 return GetStockElx(STOCKELX_EMPTY);
2234
2235 // first
2236 ElxInterface * pListOne = BuildRepeat(flags);
2237
2238 if(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1))
2239 {
2240 CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT));
2241 pList->m_elxlist.Push(pListOne);
2242
2243 while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('|'), 1) && curr != CHART_INFO(RCHART(')'), 1))
2244 {
2245 pListOne = BuildRepeat(flags);
2246
2247 // add
2248 pList->m_elxlist.Push(pListOne);
2249 }
2250
2251 return pList;
2252 }
2253
2254 return pListOne;
2255 }
2256
BuildRepeat(int & flags)2257 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildRepeat(int & flags)
2258 {
2259 // simple
2260 ElxInterface * pSimple = BuildSimple(flags);
2261
2262 if(curr.type == 0) return pSimple;
2263
2264 // is quantifier or not
2265 int bIsQuantifier = 1;
2266
2267 // quantifier range
2268 unsigned int nMin = 0, nMax = 0;
2269
2270 switch(curr.ch)
2271 {
2272 case RCHART('{'):
2273 {
2274 CBufferT <char> re;
2275
2276 // skip '{'
2277 MoveNext();
2278
2279 // copy
2280 while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART('}'), 1))
2281 {
2282 re.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2283 MoveNext();
2284 }
2285
2286 // skip '}'
2287 MoveNext();
2288
2289 // read
2290 int red;
2291 char * str = re.GetBuffer();
2292
2293 if( ! ReadDec(str, nMin) )
2294 red = 0;
2295 else if( *str != ',' )
2296 red = 1;
2297 else
2298 {
2299 str ++;
2300
2301 if( ! ReadDec(str, nMax) )
2302 red = 2;
2303 else
2304 red = 3;
2305 }
2306
2307 // check
2308 if(red <= 1 ) nMax = nMin;
2309 if(red == 2 ) nMax = INT_MAX;
2310 if(nMax < nMin) nMax = nMin;
2311 }
2312 break;
2313
2314 case RCHART('?'):
2315 nMin = 0;
2316 nMax = 1;
2317
2318 // skip '?'
2319 MoveNext();
2320 break;
2321
2322 case RCHART('*'):
2323 nMin = 0;
2324 nMax = INT_MAX;
2325
2326 // skip '*'
2327 MoveNext();
2328 break;
2329
2330 case RCHART('+'):
2331 nMin = 1;
2332 nMax = INT_MAX;
2333
2334 // skip '+'
2335 MoveNext();
2336 break;
2337
2338 default:
2339 bIsQuantifier = 0;
2340 break;
2341 }
2342
2343 // do quantify
2344 if(bIsQuantifier)
2345 {
2346 // 0 times
2347 if(nMax == 0)
2348 return GetStockElx(STOCKELX_EMPTY);
2349
2350 // fixed times
2351 if(nMin == nMax)
2352 {
2353 if(curr == CHART_INFO(RCHART('?'), 1) || curr == CHART_INFO(RCHART('+'), 1))
2354 MoveNext();
2355
2356 return Keep(new CRepeatElx(pSimple, nMin));
2357 }
2358
2359 // range times
2360 if(curr == CHART_INFO(RCHART('?'), 1))
2361 {
2362 MoveNext();
2363 return Keep(new CReluctantElx(pSimple, nMin, nMax));
2364 }
2365 else if(curr == CHART_INFO(RCHART('+'), 1))
2366 {
2367 MoveNext();
2368 return Keep(new CPossessiveElx(pSimple, nMin, nMax));
2369 }
2370 else
2371 {
2372 return Keep(new CGreedyElx(pSimple, nMin, nMax));
2373 }
2374 }
2375
2376 return pSimple;
2377 }
2378
BuildSimple(int & flags)2379 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildSimple(int & flags)
2380 {
2381 CBufferT <CHART> fixed;
2382
2383 while(curr != CHART_INFO(0, 1))
2384 {
2385 if(curr.type == 0)
2386 {
2387 if(next == CHART_INFO(RCHART('{'), 1) || next == CHART_INFO(RCHART('?'), 1) || next == CHART_INFO(RCHART('*'), 1) || next == CHART_INFO(RCHART('+'), 1))
2388 {
2389 if(fixed.GetSize() == 0)
2390 {
2391 fixed.Append(curr.ch, 1);
2392 MoveNext();
2393 }
2394
2395 break;
2396 }
2397 else
2398 {
2399 fixed.Append(curr.ch, 1);
2400 MoveNext();
2401 }
2402 }
2403 else if(curr.type == 1)
2404 {
2405 CHART vch = curr.ch;
2406
2407 // end of simple
2408 if(vch == RCHART(')') || vch == RCHART('|'))
2409 break;
2410
2411 // has fixed already
2412 if(fixed.GetSize() > 0)
2413 break;
2414
2415 // left parentheses
2416 if(vch == RCHART('('))
2417 {
2418 return BuildRecursive(flags);
2419 }
2420
2421 // char set
2422 if( vch == RCHART('[') || vch == RCHART('.') || vch == RCHART('w') || vch == RCHART('W') ||
2423 vch == RCHART('s') || vch == RCHART('S') || vch == RCHART('d') || vch == RCHART('D')
2424 )
2425 {
2426 return BuildCharset(flags);
2427 }
2428
2429 // boundary
2430 if( vch == RCHART('^') || vch == RCHART('$') || vch == RCHART('A') || vch == RCHART('Z') ||
2431 vch == RCHART('b') || vch == RCHART('B') || vch == RCHART('G') // vch == RCHART('<') || vch == RCHART('>')
2432 )
2433 {
2434 return BuildBoundary(flags);
2435 }
2436
2437 // backref
2438 if(vch == RCHART('\\') || vch == RCHART('k') || vch == RCHART('g'))
2439 {
2440 return BuildBackref(flags);
2441 }
2442
2443 // treat vchar as char
2444 fixed.Append(curr.ch, 1);
2445 MoveNext();
2446 }
2447 }
2448
2449 if(fixed.GetSize() > 0)
2450 return Keep(new CStringElxT <CHART> (fixed.GetBuffer(), fixed.GetSize(), flags & RIGHTTOLEFT, flags & IGNORECASE));
2451 else
2452 return GetStockElx(STOCKELX_EMPTY);
2453 }
2454
BuildCharset(int & flags)2455 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildCharset(int & flags)
2456 {
2457 // char
2458 CHART ch = curr.ch;
2459
2460 // skip
2461 MoveNext();
2462
2463 switch(ch)
2464 {
2465 case RCHART('.'):
2466 return GetStockElx(
2467 flags & RIGHTTOLEFT ?
2468 ((flags & SINGLELINE) ? STOCKELX_DOT_ALL_RIGHTLEFT : STOCKELX_DOT_NOT_ALL_RIGHTLEFT) :
2469 ((flags & SINGLELINE) ? STOCKELX_DOT_ALL : STOCKELX_DOT_NOT_ALL)
2470 );
2471
2472 case RCHART('w'):
2473 return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_WORD_RIGHTLEFT : STOCKELX_WORD);
2474
2475 case RCHART('W'):
2476 return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_WORD_RIGHTLEFT_NOT : STOCKELX_WORD_NOT);
2477
2478 case RCHART('s'):
2479 return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_SPACE_RIGHTLEFT : STOCKELX_SPACE);
2480
2481 case RCHART('S'):
2482 return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_SPACE_RIGHTLEFT_NOT : STOCKELX_SPACE_NOT);
2483
2484 case RCHART('d'):
2485 return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_DIGITAL_RIGHTLEFT : STOCKELX_DIGITAL);
2486
2487 case RCHART('D'):
2488 return GetStockElx(flags & RIGHTTOLEFT ? STOCKELX_DIGITAL_RIGHTLEFT_NOT : STOCKELX_DIGITAL_NOT);
2489
2490 case RCHART('['):
2491 {
2492 CRangeElxT <CHART> * pRange;
2493
2494 // create
2495 if(curr == CHART_INFO(RCHART(':'), 1))
2496 {
2497 CBufferT <char> posix;
2498
2499 do {
2500 posix.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2501 MoveNext();
2502 }
2503 while(curr.ch != RCHART(0) && curr != CHART_INFO(RCHART(']'), 1));
2504
2505 MoveNext(); // skip ']'
2506
2507 // posix
2508 return Keep(new CPosixElxT <CHART> (posix.GetBuffer(), flags & RIGHTTOLEFT));
2509 }
2510 else if(curr == CHART_INFO(RCHART('^'), 1))
2511 {
2512 MoveNext(); // skip '^'
2513 pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (flags & RIGHTTOLEFT, 0));
2514 }
2515 else
2516 {
2517 pRange = (CRangeElxT <CHART> *)Keep(new CRangeElxT <CHART> (flags & RIGHTTOLEFT, 1));
2518 }
2519
2520 // parse
2521 while(curr != CHART_INFO(0, 1) && curr != CHART_INFO(RCHART(']'), 1))
2522 {
2523 ch = curr.ch;
2524
2525 if(curr.type == 1 && (
2526 ch == RCHART('.') || ch == RCHART('w') || ch == RCHART('W') || ch == RCHART('s') || ch == RCHART('S') || ch == RCHART('d') || ch == RCHART('D') ||
2527 (ch == RCHART('[') && next == CHART_INFO(RCHART(':'), 1))
2528 ))
2529 {
2530 pRange->m_embeds.Push(BuildCharset(flags));
2531 }
2532 else if(next == CHART_INFO(RCHART('-'), 1) && nex2.type == 0)
2533 {
2534 pRange->m_ranges.Push(ch); pRange->m_ranges.Push(nex2.ch);
2535
2536 // next
2537 MoveNext();
2538 MoveNext();
2539 MoveNext();
2540 }
2541 else
2542 {
2543 pRange->m_chars.Push(ch);
2544
2545 // next
2546 MoveNext();
2547 }
2548 }
2549
2550 // skip ']'
2551 MoveNext();
2552
2553 return pRange;
2554 }
2555 }
2556
2557 return GetStockElx(STOCKELX_EMPTY);
2558 }
2559
BuildRecursive(int & flags)2560 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildRecursive(int & flags)
2561 {
2562 // skip '('
2563 MoveNext();
2564
2565 if(curr == CHART_INFO(RCHART('?'), 1))
2566 {
2567 ElxInterface * pElx = 0;
2568
2569 // skip '?'
2570 MoveNext();
2571
2572 int bNegative = 0;
2573 CHART named_end = RCHART('>');
2574
2575 switch(curr.ch)
2576 {
2577 case RCHART('!'):
2578 bNegative = 1;
2579
2580 case RCHART('='):
2581 {
2582 MoveNext(); // skip '!' or '='
2583 pElx = Keep(new CAssertElx(BuildAlternative(flags & ~RIGHTTOLEFT), !bNegative));
2584 }
2585 break;
2586
2587 case RCHART('<'):
2588 switch(next.ch)
2589 {
2590 case RCHART('!'):
2591 bNegative = 1;
2592
2593 case RCHART('='):
2594 MoveNext(); // skip '<'
2595 MoveNext(); // skip '!' or '='
2596 {
2597 pElx = Keep(new CAssertElx(BuildAlternative(flags | RIGHTTOLEFT), !bNegative));
2598 }
2599 break;
2600
2601 default: // named group
2602 break;
2603 }
2604 // break if assertion // else named
2605 if(pElx != 0) break;
2606
2607 case RCHART('P'):
2608 if(curr.ch == RCHART('P')) MoveNext(); // skip 'P'
2609
2610 case RCHART('\''):
2611 if (curr.ch == RCHART('<' )) named_end = RCHART('>' );
2612 else if(curr.ch == RCHART('\'')) named_end = RCHART('\'');
2613 MoveNext(); // skip '<' or '\''
2614 {
2615 // named number
2616 int nThisBackref = m_nNextNamed ++;
2617
2618 CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT));
2619 CBracketElx * pleft = (CBracketElx *)Keep(new CBracketElx(-1, flags & RIGHTTOLEFT ? 1 : 0));
2620 CBracketElx * pright = (CBracketElx *)Keep(new CBracketElx(-1, flags & RIGHTTOLEFT ? 0 : 1));
2621
2622 // save name
2623 CBufferT <CHART> & name = pleft->m_szNamed;
2624 CBufferT <char> num;
2625
2626 while(curr.ch != RCHART(0) && curr.ch != named_end)
2627 {
2628 name.Append(curr.ch, 1);
2629 num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2630 MoveNext();
2631 }
2632 MoveNext(); // skip '>' or '\''
2633
2634 // check <num>
2635 unsigned int number;
2636 char * str = num.GetBuffer();
2637
2638 if( ReadDec(str, number) ? ( *str == '\0') : 0 )
2639 {
2640 pleft ->m_nnumber = number;
2641 pright->m_nnumber = number;
2642
2643 name.Release();
2644 }
2645
2646 // left, center, right
2647 pList->m_elxlist.Push(pleft);
2648 pList->m_elxlist.Push(BuildAlternative(flags));
2649 pList->m_elxlist.Push(pright);
2650
2651 // for recursive
2652 m_namedlist.Prepare(nThisBackref);
2653 m_namedlist[nThisBackref] = pList;
2654
2655 pElx = pList;
2656 }
2657 break;
2658
2659 case RCHART('>'):
2660 {
2661 MoveNext(); // skip '>'
2662 pElx = Keep(new CIndependentElx(BuildAlternative(flags)));
2663 }
2664 break;
2665
2666 case RCHART('R'):
2667 MoveNext(); // skip 'R'
2668 while(curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space
2669
2670 if(curr.ch == RCHART('<') || curr.ch == RCHART('\''))
2671 {
2672 named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\'');
2673 CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(-3));
2674
2675 MoveNext(); // skip '<' or '\\'
2676
2677 // save name
2678 CBufferT <CHART> & name = pDelegate->m_szNamed;
2679 CBufferT <char> num;
2680
2681 while(curr.ch != RCHART(0) && curr.ch != named_end)
2682 {
2683 name.Append(curr.ch, 1);
2684 num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2685 MoveNext();
2686 }
2687 MoveNext(); // skip '>' or '\''
2688
2689 // check <num>
2690 unsigned int number;
2691 char * str = num.GetBuffer();
2692
2693 if( ReadDec(str, number) ? ( *str == '\0') : 0 )
2694 {
2695 pDelegate->m_ndata = number;
2696 name.Release();
2697 }
2698
2699 m_recursivelist.Push(pDelegate);
2700 pElx = pDelegate;
2701 }
2702 else
2703 {
2704 CBufferT <char> rto;
2705 while(curr.ch != RCHART(0) && curr.ch != RCHART(')'))
2706 {
2707 rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2708 MoveNext();
2709 }
2710
2711 unsigned int rtono = 0;
2712 char * str = rto.GetBuffer();
2713 ReadDec(str, rtono);
2714
2715 CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono));
2716
2717 m_recursivelist.Push(pDelegate);
2718 pElx = pDelegate;
2719 }
2720 break;
2721
2722 case RCHART('('):
2723 {
2724 CConditionElx * pConditionElx = (CConditionElx *)Keep(new CConditionElx());
2725
2726 // condition
2727 ElxInterface * & pCondition = pConditionElx->m_pelxask;
2728
2729 if(next == CHART_INFO(RCHART('?'), 1))
2730 {
2731 pCondition = BuildRecursive(flags);
2732 }
2733 else // named, assert or number
2734 {
2735 MoveNext(); // skip '('
2736 int pos0 = curr.pos;
2737
2738 // save elx condition
2739 pCondition = Keep(new CAssertElx(BuildAlternative(flags), 1));
2740
2741 // save name
2742 pConditionElx->m_szNamed.Append(m_pattern.GetBuffer() + pos0, curr.pos - pos0, 1);
2743
2744 // save number
2745 CBufferT <char> numstr;
2746 while(pos0 < curr.pos)
2747 {
2748 CHART ch = m_pattern[pos0];
2749 numstr.Append(((ch & (CHART)0xff) == ch) ? (char)ch : 0, 1);
2750 pos0 ++;
2751 }
2752
2753 unsigned int number;
2754 char * str = numstr.GetBuffer();
2755
2756 // valid group number
2757 if( ReadDec(str, number) ? ( *str == '\0') : 0 )
2758 {
2759 pConditionElx->m_nnumber = number;
2760 pCondition = 0;
2761 }
2762 else // maybe elx, maybe named
2763 {
2764 pConditionElx->m_nnumber = -1;
2765 m_namedconditionlist.Push(pConditionElx);
2766 }
2767
2768 MoveNext(); // skip ')'
2769 }
2770
2771 // alternative
2772 {
2773 int newflags = flags;
2774
2775 pConditionElx->m_pelxyes = BuildList(newflags);
2776 }
2777
2778 if(curr.ch == RCHART('|'))
2779 {
2780 MoveNext(); // skip '|'
2781
2782 pConditionElx->m_pelxno = BuildAlternative(flags);
2783 }
2784 else
2785 {
2786 pConditionElx->m_pelxno = 0;
2787 }
2788
2789 pElx = pConditionElx;
2790 }
2791 break;
2792
2793 default:
2794 while(curr.ch != RCHART(0) && isspace(curr.ch)) MoveNext(); // skip space
2795
2796 if(curr.ch >= RCHART('0') && curr.ch <= RCHART('9')) // recursive (?1) => (?R1)
2797 {
2798 CBufferT <char> rto;
2799 while(curr.ch != RCHART(0) && curr.ch != RCHART(')'))
2800 {
2801 rto.Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2802 MoveNext();
2803 }
2804
2805 unsigned int rtono = 0;
2806 char * str = rto.GetBuffer();
2807 ReadDec(str, rtono);
2808
2809 CDelegateElx * pDelegate = (CDelegateElx *)Keep(new CDelegateElx(rtono));
2810
2811 m_recursivelist.Push(pDelegate);
2812 pElx = pDelegate;
2813 }
2814 else
2815 {
2816 // flag
2817 int newflags = flags;
2818 while(curr != CHART_INFO(0, 1) && curr.ch != RCHART(':') && curr.ch != RCHART(')') && curr != CHART_INFO(RCHART('('), 1))
2819 {
2820 int tochange = 0;
2821
2822 switch(curr.ch)
2823 {
2824 case RCHART('i'):
2825 case RCHART('I'):
2826 tochange = IGNORECASE;
2827 break;
2828
2829 case RCHART('s'):
2830 case RCHART('S'):
2831 tochange = SINGLELINE;
2832 break;
2833
2834 case RCHART('m'):
2835 case RCHART('M'):
2836 tochange = MULTILINE;
2837 break;
2838
2839 case RCHART('g'):
2840 case RCHART('G'):
2841 tochange = GLOBAL;
2842 break;
2843
2844 case RCHART('-'):
2845 bNegative = 1;
2846 break;
2847 }
2848
2849 if(bNegative)
2850 newflags &= ~tochange;
2851 else
2852 newflags |= tochange;
2853
2854 // move to next char
2855 MoveNext();
2856 }
2857
2858 if(curr.ch == RCHART(':') || curr == CHART_INFO(RCHART('('), 1))
2859 {
2860 // skip ':'
2861 if(curr.ch == RCHART(':')) MoveNext();
2862
2863 pElx = BuildAlternative(newflags);
2864 }
2865 else
2866 {
2867 // change parent flags
2868 flags = newflags;
2869
2870 pElx = GetStockElx(STOCKELX_EMPTY);
2871 }
2872 }
2873 break;
2874 }
2875
2876 MoveNext(); // skip ')'
2877
2878 return pElx;
2879 }
2880 else
2881 {
2882 // group and number
2883 CListElx * pList = (CListElx *)Keep(new CListElx(flags & RIGHTTOLEFT));
2884 int nThisBackref = ++ m_nMaxNumber;
2885
2886 // left, center, right
2887 pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, flags & RIGHTTOLEFT ? 1 : 0)));
2888 pList->m_elxlist.Push(BuildAlternative(flags));
2889 pList->m_elxlist.Push(Keep(new CBracketElx(nThisBackref, flags & RIGHTTOLEFT ? 0 : 1)));
2890
2891 // for recursive
2892 m_grouplist.Prepare(nThisBackref);
2893 m_grouplist[nThisBackref] = pList;
2894
2895 // right
2896 MoveNext(); // skip ')'
2897
2898 return pList;
2899 }
2900 }
2901
BuildBoundary(int & flags)2902 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildBoundary(int & flags)
2903 {
2904 // char
2905 CHART ch = curr.ch;
2906
2907 // skip
2908 MoveNext();
2909
2910 switch(ch)
2911 {
2912 case RCHART('^'):
2913 return Keep(new CBoundaryElxT <CHART> ((flags & MULTILINE) ? BOUNDARY_LINE_BEGIN : BOUNDARY_FILE_BEGIN));
2914
2915 case RCHART('$'):
2916 return Keep(new CBoundaryElxT <CHART> ((flags & MULTILINE) ? BOUNDARY_LINE_END : BOUNDARY_FILE_END));
2917
2918 case RCHART('b'):
2919 return Keep(new CBoundaryElxT <CHART> (BOUNDARY_WORD_EDGE));
2920
2921 case RCHART('B'):
2922 return Keep(new CBoundaryElxT <CHART> (BOUNDARY_WORD_EDGE, 0));
2923
2924 case RCHART('A'):
2925 return Keep(new CBoundaryElxT <CHART> (BOUNDARY_FILE_BEGIN));
2926
2927 case RCHART('Z'):
2928 return Keep(new CBoundaryElxT <CHART> (BOUNDARY_FILE_END));
2929
2930 case RCHART('G'):
2931 if(flags & GLOBAL)
2932 return Keep(new CGlobalElx());
2933 else
2934 return GetStockElx(STOCKELX_EMPTY);
2935
2936 default:
2937 return GetStockElx(STOCKELX_EMPTY);
2938 }
2939 }
2940
BuildBackref(int & flags)2941 template <class CHART> ElxInterface * CBuilderT <CHART> :: BuildBackref(int & flags)
2942 {
2943 // skip '\\' or '\k' or '\g'
2944 MoveNext();
2945
2946 if(curr.ch == RCHART('<') || curr.ch == RCHART('\''))
2947 {
2948 CHART named_end = curr.ch == RCHART('<') ? RCHART('>') : RCHART('\'');
2949 CBackrefElxT <CHART> * pbackref = (CBackrefElxT <CHART> *)Keep(new CBackrefElxT <CHART> (-1, flags & RIGHTTOLEFT, flags & IGNORECASE));
2950
2951 MoveNext(); // skip '<' or '\''
2952
2953 // save name
2954 CBufferT <CHART> & name = pbackref->m_szNamed;
2955 CBufferT <char> num;
2956
2957 while(curr.ch != RCHART(0) && curr.ch != named_end)
2958 {
2959 name.Append(curr.ch, 1);
2960 num .Append(((curr.ch & (CHART)0xff) == curr.ch) ? (char)curr.ch : 0, 1);
2961 MoveNext();
2962 }
2963 MoveNext(); // skip '>' or '\''
2964
2965 // check <num>
2966 unsigned int number;
2967 char * str = num.GetBuffer();
2968
2969 if( ReadDec(str, number) ? ( *str == '\0') : 0 )
2970 {
2971 pbackref->m_nnumber = number;
2972 name.Release();
2973 }
2974 else
2975 {
2976 m_namedbackreflist.Push(pbackref);
2977 }
2978
2979 return pbackref;
2980 }
2981 else
2982 {
2983 unsigned int nbackref = 0;
2984
2985 for(int i=0; i<3; i++)
2986 {
2987 if(curr.ch >= RCHART('0') && curr.ch <= RCHART('9'))
2988 nbackref = nbackref * 10 + (curr.ch - RCHART('0'));
2989 else
2990 break;
2991
2992 MoveNext();
2993 }
2994
2995 return Keep(new CBackrefElxT <CHART> (nbackref, flags & RIGHTTOLEFT, flags & IGNORECASE));
2996 }
2997 }
2998
ReadDec(char * & str,unsigned int & dec)2999 template <class CHART> int CBuilderT <CHART> :: ReadDec(char * & str, unsigned int & dec)
3000 {
3001 int s = 0;
3002 while(str[s] != 0 && isspace(str[s])) s++;
3003
3004 if(str[s] < '0' || str[s] > '9') return 0;
3005
3006 dec = 0;
3007 unsigned int i;
3008
3009 for(i = s; i<sizeof(CHART)*3 + s; i++)
3010 {
3011 if(str[i] >= '0' && str[i] <= '9')
3012 dec = dec * 10 + (str[i] - '0');
3013 else
3014 break;
3015 }
3016
3017 while(str[i] != 0 && isspace(str[i])) i++;
3018 str += i;
3019
3020 return 1;
3021 }
3022
3023 //
3024 // Regexp
3025 //
3026 template <class CHART> class CRegexpT
3027 {
3028 public:
3029 CRegexpT(const CHART * pattern = 0, int flags = 0);
3030 CRegexpT(const CHART * pattern, int length, int flags);
3031 void Compile(const CHART * pattern, int flags = 0);
3032 void Compile(const CHART * pattern, int length, int flags);
3033
3034 public:
3035 MatchResult MatchExact(const CHART * tstring, CContext * pContext = 0) const;
3036 MatchResult MatchExact(const CHART * tstring, int length, CContext * pContext = 0) const;
3037 MatchResult Match(const CHART * tstring, int start = -1, CContext * pContext = 0) const;
3038 MatchResult Match(const CHART * tstring, int length, int start, CContext * pContext = 0) const;
3039 MatchResult Match(CContext * pContext) const;
3040 CContext * PrepareMatch(const CHART * tstring, int start = -1, CContext * pContext = 0) const;
3041 CContext * PrepareMatch(const CHART * tstring, int length, int start, CContext * pContext = 0) const;
3042 CHART * Replace(const CHART * tstring, const CHART * replaceto, int start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const;
3043 CHART * Replace(const CHART * tstring, int string_length, const CHART * replaceto, int to_length, int & result_length, int start = -1, int ntimes = -1, MatchResult * result = 0, CContext * pContext = 0) const;
3044 int GetNamedGroupNumber(const CHART * group_name) const;
3045
3046 public:
3047 static void ReleaseString (CHART * tstring );
3048 static void ReleaseContext(CContext * pContext);
3049
3050 public:
3051 CBuilderT <CHART> m_builder;
3052 };
3053
3054 //
3055 // Implementation
3056 //
CRegexpT(const CHART * pattern,int flags)3057 template <class CHART> CRegexpT <CHART> :: CRegexpT(const CHART * pattern, int flags)
3058 {
3059 Compile(pattern, CBufferRefT<CHART>(pattern).GetSize(), flags);
3060 }
3061
CRegexpT(const CHART * pattern,int length,int flags)3062 template <class CHART> CRegexpT <CHART> :: CRegexpT(const CHART * pattern, int length, int flags)
3063 {
3064 Compile(pattern, length, flags);
3065 }
3066
Compile(const CHART * pattern,int flags)3067 template <class CHART> inline void CRegexpT <CHART> :: Compile(const CHART * pattern, int flags)
3068 {
3069 Compile(pattern, CBufferRefT<CHART>(pattern).GetSize(), flags);
3070 }
3071
Compile(const CHART * pattern,int length,int flags)3072 template <class CHART> void CRegexpT <CHART> :: Compile(const CHART * pattern, int length, int flags)
3073 {
3074 m_builder.Clear();
3075 if(pattern != 0) m_builder.Build(CBufferRefT<CHART>(pattern, length), flags);
3076 }
3077
MatchExact(const CHART * tstring,CContext * pContext)3078 template <class CHART> inline MatchResult CRegexpT <CHART> :: MatchExact(const CHART * tstring, CContext * pContext) const
3079 {
3080 return MatchExact(tstring, CBufferRefT<CHART>(tstring).GetSize(), pContext);
3081 }
3082
MatchExact(const CHART * tstring,int length,CContext * pContext)3083 template <class CHART> MatchResult CRegexpT <CHART> :: MatchExact(const CHART * tstring, int length, CContext * pContext) const
3084 {
3085 if(m_builder.m_pTopElx == 0)
3086 return 0;
3087
3088 // info
3089 int endpos = 0;
3090
3091 CContext context;
3092 if(pContext == 0) pContext = &context;
3093
3094 pContext->m_stack.Restore(0);
3095 pContext->m_capturestack.Restore(0);
3096 pContext->m_captureindex.Restore(0);
3097
3098 pContext->m_nParenZindex = 0;
3099 pContext->m_nLastBeginPos = -1;
3100 pContext->m_pMatchString = (void*)tstring;
3101 pContext->m_pMatchStringLength = length;
3102
3103 if(m_builder.m_nFlags & RIGHTTOLEFT)
3104 {
3105 pContext->m_nBeginPos = length;
3106 pContext->m_nCurrentPos = length;
3107 endpos = 0;
3108 }
3109 else
3110 {
3111 pContext->m_nBeginPos = 0;
3112 pContext->m_nCurrentPos = 0;
3113 endpos = length;
3114 }
3115
3116 pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1);
3117 pContext->m_captureindex[0] = 0;
3118 pContext->m_capturestack.Push(0);
3119 pContext->m_capturestack.Push(pContext->m_nCurrentPos);
3120 pContext->m_capturestack.Push(-1);
3121 pContext->m_capturestack.Push(-1);
3122
3123 // match
3124 if( ! m_builder.m_pTopElx->Match( pContext ) )
3125 return 0;
3126 else
3127 {
3128 while( pContext->m_nCurrentPos != endpos )
3129 {
3130 if( ! m_builder.m_pTopElx->MatchNext( pContext ) )
3131 return 0;
3132 else
3133 {
3134 if( pContext->m_nLastBeginPos == pContext->m_nBeginPos && pContext->m_nBeginPos == pContext->m_nCurrentPos )
3135 return 0;
3136 else
3137 pContext->m_nLastBeginPos = pContext->m_nCurrentPos;
3138 }
3139 }
3140
3141 // end pos
3142 pContext->m_capturestack[2] = pContext->m_nCurrentPos;
3143
3144 return MatchResult( pContext, m_builder.m_nMaxNumber );
3145 }
3146 }
3147
Match(const CHART * tstring,int start,CContext * pContext)3148 template <class CHART> MatchResult CRegexpT <CHART> :: Match(const CHART * tstring, int start, CContext * pContext) const
3149 {
3150 return Match(tstring, CBufferRefT<CHART>(tstring).GetSize(), start, pContext);
3151 }
3152
Match(const CHART * tstring,int length,int start,CContext * pContext)3153 template <class CHART> MatchResult CRegexpT <CHART> :: Match(const CHART * tstring, int length, int start, CContext * pContext) const
3154 {
3155 if(m_builder.m_pTopElx == 0)
3156 return 0;
3157
3158 CContext context;
3159 if(pContext == 0) pContext = &context;
3160
3161 pContext->m_nParenZindex = 0;
3162 pContext->m_nLastBeginPos = -1;
3163 pContext->m_pMatchString = (void*)tstring;
3164 pContext->m_pMatchStringLength = length;
3165
3166 if(start < 0)
3167 {
3168 if(m_builder.m_nFlags & RIGHTTOLEFT)
3169 {
3170 pContext->m_nBeginPos = length;
3171 pContext->m_nCurrentPos = length;
3172 }
3173 else
3174 {
3175 pContext->m_nBeginPos = 0;
3176 pContext->m_nCurrentPos = 0;
3177 }
3178 }
3179 else
3180 {
3181 pContext->m_nBeginPos = start;
3182 pContext->m_nCurrentPos = start;
3183 }
3184
3185 return Match( pContext );
3186 }
3187
Match(CContext * pContext)3188 template <class CHART> MatchResult CRegexpT <CHART> :: Match(CContext * pContext) const
3189 {
3190 if(m_builder.m_pTopElx == 0)
3191 return 0;
3192
3193 int endpos, delta;
3194
3195 if(m_builder.m_nFlags & RIGHTTOLEFT)
3196 {
3197 endpos = -1;
3198 delta = -1;
3199 }
3200 else
3201 {
3202 endpos = pContext->m_pMatchStringLength + 1;
3203 delta = 1;
3204 }
3205
3206 while(pContext->m_nCurrentPos != endpos)
3207 {
3208 pContext->m_captureindex.Restore(0);
3209 pContext->m_stack .Restore(0);
3210 pContext->m_capturestack.Restore(0);
3211
3212 pContext->m_captureindex.Prepare(m_builder.m_nMaxNumber, -1);
3213 pContext->m_captureindex[0] = 0;
3214 pContext->m_capturestack.Push(0);
3215 pContext->m_capturestack.Push(pContext->m_nCurrentPos);
3216 pContext->m_capturestack.Push(-1);
3217 pContext->m_capturestack.Push(-1);
3218
3219 if( m_builder.m_pTopElx->Match( pContext ) )
3220 {
3221 // zero width
3222 if( pContext->m_nLastBeginPos == pContext->m_nBeginPos && pContext->m_nBeginPos == pContext->m_nCurrentPos )
3223 {
3224 pContext->m_nCurrentPos += delta;
3225 continue;
3226 }
3227
3228 // save pos
3229 pContext->m_nLastBeginPos = pContext->m_nBeginPos;
3230 pContext->m_nBeginPos = pContext->m_nCurrentPos;
3231 pContext->m_capturestack[2] = pContext->m_nCurrentPos;
3232
3233 // return
3234 return MatchResult( pContext, m_builder.m_nMaxNumber );
3235 }
3236 else
3237 {
3238 pContext->m_nCurrentPos += delta;
3239 }
3240 }
3241
3242 return 0;
3243 }
3244
PrepareMatch(const CHART * tstring,int start,CContext * pContext)3245 template <class CHART> inline CContext * CRegexpT <CHART> :: PrepareMatch(const CHART * tstring, int start, CContext * pContext) const
3246 {
3247 return PrepareMatch(tstring, CBufferRefT<CHART>(tstring).GetSize(), start, pContext);
3248 }
3249
PrepareMatch(const CHART * tstring,int length,int start,CContext * pContext)3250 template <class CHART> CContext * CRegexpT <CHART> :: PrepareMatch(const CHART * tstring, int length, int start, CContext * pContext) const
3251 {
3252 if(m_builder.m_pTopElx == 0)
3253 return 0;
3254
3255 if(pContext == 0) pContext = new CContext();
3256
3257 pContext->m_nParenZindex = 0;
3258 pContext->m_nLastBeginPos = -1;
3259 pContext->m_pMatchString = (void*)tstring;
3260 pContext->m_pMatchStringLength = length;
3261
3262 if(start < 0)
3263 {
3264 if(m_builder.m_nFlags & RIGHTTOLEFT)
3265 {
3266 pContext->m_nBeginPos = length;
3267 pContext->m_nCurrentPos = length;
3268 }
3269 else
3270 {
3271 pContext->m_nBeginPos = 0;
3272 pContext->m_nCurrentPos = 0;
3273 }
3274 }
3275 else
3276 {
3277 pContext->m_nBeginPos = start;
3278 pContext->m_nCurrentPos = start;
3279 }
3280
3281 return pContext;
3282 }
3283
GetNamedGroupNumber(const CHART * group_name)3284 template <class CHART> inline int CRegexpT <CHART> :: GetNamedGroupNumber(const CHART * group_name) const
3285 {
3286 return m_builder.GetNamedNumber(group_name);
3287 }
3288
Replace(const CHART * tstring,const CHART * replaceto,int start,int ntimes,MatchResult * result,CContext * pContext)3289 template <class CHART> CHART * CRegexpT <CHART> :: Replace(const CHART * tstring, const CHART * replaceto, int start, int ntimes, MatchResult * result, CContext * pContext) const
3290 {
3291 int result_length = 0;
3292 return Replace(tstring, CBufferRefT<CHART>(tstring).GetSize(), replaceto, CBufferRefT<CHART>(replaceto).GetSize(), result_length, start, ntimes, result, pContext);
3293 }
3294
Replace(const CHART * tstring,int string_length,const CHART * replaceto,int to_length,int & result_length,int start,int ntimes,MatchResult * remote_result,CContext * oContext)3295 template <class CHART> CHART * CRegexpT <CHART> :: Replace(const CHART * tstring, int string_length, const CHART * replaceto, int to_length, int & result_length, int start, int ntimes, MatchResult * remote_result, CContext * oContext) const
3296 {
3297 typedef CBufferRefT <CHART> StringRef;
3298
3299 MatchResult local_result(0), * result = remote_result ? remote_result : & local_result;
3300
3301 if(m_builder.m_pTopElx == 0) return 0;
3302
3303 // Prepare
3304 CContext * pContext = PrepareMatch(tstring, string_length, start, oContext);
3305
3306 int flags = m_builder.m_nFlags;
3307 int lastIndex = (flags & RIGHTTOLEFT) ? string_length : 0;
3308 int endpos = (flags & RIGHTTOLEFT) ? 0 : string_length;
3309 int toIndex = 0, toLastIndex = 0;
3310 int i, ntime;
3311
3312 CBufferT <StringRef *> buffer, buf;
3313
3314 static const CHART rtoptn[] = { RCHART('\\'), RCHART('$' ), RCHART('('), RCHART('?'), RCHART(':'), RCHART('[' ), RCHART('$' ), RCHART('&' ), RCHART('`' ), RCHART('\''), RCHART('+'), RCHART('_' ), RCHART('\\'), RCHART('d'), RCHART(']'), RCHART('|'), RCHART('\\'), RCHART('{'), RCHART('.'), RCHART('*'), RCHART('?'), RCHART('\\'), RCHART('}'), RCHART(')' ), RCHART('\0') };
3315 static int rtoptnlen = StringRef(rtoptn).GetSize();
3316 static CRegexpT <CHART> rtoreg(rtoptn, rtoptnlen, 0);
3317
3318 // Match
3319 for(ntime = 0; ntimes < 0 || ntime < ntimes; ntime ++)
3320 {
3321 (*result) = Match(pContext);
3322
3323 if( ! result->IsMatched() )
3324 break;
3325
3326 toIndex = toLastIndex;
3327
3328 // before
3329 if( flags & RIGHTTOLEFT )
3330 {
3331 int distance = lastIndex - result->GetEnd();
3332 if( distance )
3333 {
3334 buffer.Push(new StringRef(tstring + result->GetEnd(), distance));
3335 toIndex -= distance;
3336 }
3337 lastIndex = result->GetStart();
3338 }
3339 else
3340 {
3341 int distance = result->GetStart() - lastIndex;
3342 if( distance )
3343 {
3344 buffer.Push(new StringRef(tstring + lastIndex, distance));
3345 toIndex += distance;
3346 }
3347 lastIndex = result->GetEnd();
3348 }
3349
3350 toLastIndex = toIndex;
3351
3352 // middle
3353 CContext * pCtx = rtoreg.PrepareMatch(replaceto, to_length, -1);
3354 int lastI = 0;
3355
3356 buf.Restore(0);
3357
3358 while(1)
3359 {
3360 MatchResult res = rtoreg.Match(pCtx);
3361
3362 if( ! res.IsMatched() )
3363 break;
3364
3365 // before
3366 int distance = res.GetStart() - lastI;
3367 if( distance )
3368 {
3369 buf.Push(new StringRef(replaceto + lastI, distance));
3370 }
3371 lastI = res.GetStart();
3372
3373 // middle
3374 int delta = 2, nmatch = 0;
3375
3376 switch(replaceto[res.GetStart() + 1])
3377 {
3378 case RCHART('$'):
3379 buf.Push(new StringRef(rtoptn + 1, 1)); // '$' itself
3380 break;
3381
3382 case RCHART('&'):
3383 buf.Push(new StringRef(tstring + result->GetStart(), result->GetEnd() - result->GetStart()));
3384 break;
3385
3386 case RCHART('`'):
3387 buf.Push(new StringRef(tstring, result->GetStart()));
3388 break;
3389
3390 case RCHART('\''):
3391 buf.Push(new StringRef(tstring + result->GetEnd(), string_length - result->GetEnd()));
3392 break;
3393
3394 case RCHART('+'):
3395 for(nmatch = result->MaxGroupNumber(); nmatch >= 0; nmatch --)
3396 {
3397 if(result->GetGroupStart(nmatch) >= 0) break;
3398 }
3399 buf.Push(new StringRef(tstring + result->GetGroupStart(nmatch), result->GetGroupEnd(nmatch) - result->GetGroupStart(nmatch)));
3400 break;
3401
3402 case RCHART('_'):
3403 buf.Push(new StringRef(tstring, string_length));
3404 break;
3405
3406 case RCHART('{'):
3407 delta = res.GetEnd() - res.GetStart();
3408 nmatch = m_builder.GetNamedNumber(StringRef(replaceto + (res.GetStart() + 2), delta - 3));
3409
3410 if(nmatch > 0 && nmatch <= m_builder.m_nMaxNumber)
3411 buf.Push(new StringRef(tstring + result->GetGroupStart(nmatch), result->GetGroupEnd(nmatch) - result->GetGroupStart(nmatch)));
3412 else
3413 buf.Push(new StringRef(replaceto + res.GetStart(), delta));
3414 break;
3415
3416 default:
3417 nmatch = 0;
3418 for(delta=1; delta<=3; delta++)
3419 {
3420 CHART ch = replaceto[lastI + delta];
3421
3422 if(ch < RCHART('0') || ch > RCHART('9'))
3423 break;
3424
3425 nmatch = nmatch * 10 + (ch - RCHART('0'));
3426 }
3427
3428 if(nmatch > m_builder.m_nMaxNumber)
3429 {
3430 while(nmatch > m_builder.m_nMaxNumber)
3431 {
3432 nmatch /= 10;
3433 delta --;
3434 }
3435
3436 if(nmatch == 0)
3437 {
3438 delta = 1;
3439 }
3440 }
3441
3442 if(delta == 1)
3443 buf.Push(new StringRef(rtoptn + 1, 1)); // '$' itself
3444 else
3445 buf.Push(new StringRef(tstring + result->GetGroupStart(nmatch), result->GetGroupEnd(nmatch) - result->GetGroupStart(nmatch)));
3446 break;
3447 }
3448
3449 lastI += delta;
3450 }
3451
3452 // after
3453 if(lastI < to_length)
3454 buf.Push(new StringRef(replaceto + lastI, to_length - lastI));
3455
3456 // append to buffer
3457 if(flags & RIGHTTOLEFT)
3458 {
3459 for(i=buf.GetSize()-1; i>=0; i--)
3460 {
3461 buffer.Push(buf[i]);
3462 toLastIndex -= buf[i]->GetSize();
3463 }
3464 }
3465 else
3466 {
3467 for(i=0; i<buf.GetSize(); i++)
3468 {
3469 buffer.Push(buf[i]);
3470 toLastIndex += buf[i]->GetSize();
3471 }
3472 }
3473
3474 rtoreg.ReleaseContext(pCtx);
3475 }
3476
3477 // after
3478 if(flags & RIGHTTOLEFT)
3479 {
3480 if(endpos < lastIndex) buffer.Push(new StringRef(tstring + endpos, lastIndex - endpos));
3481 }
3482 else
3483 {
3484 if(lastIndex < endpos) buffer.Push(new StringRef(tstring + lastIndex, endpos - lastIndex));
3485 }
3486
3487 if(oContext == 0) ReleaseContext(pContext);
3488
3489 // join string
3490 result_length = 0;
3491 for(i=0; i<buffer.GetSize(); i++) result_length += buffer[i]->GetSize();
3492
3493 CBufferT <CHART> result_string;
3494 result_string.Prepare(result_length);
3495 result_string.Restore(0);
3496
3497 if(flags & RIGHTTOLEFT)
3498 {
3499 for(i=buffer.GetSize()-1; i>=0; i--)
3500 {
3501 result_string.Append(buffer[i]->GetBuffer(), buffer[i]->GetSize());
3502 delete buffer[i];
3503 }
3504 }
3505 else
3506 {
3507 for(i=0; i<buffer.GetSize(); i++)
3508 {
3509 result_string.Append(buffer[i]->GetBuffer(), buffer[i]->GetSize());
3510 delete buffer[i];
3511 }
3512 }
3513
3514 result_string[result_length] = 0;
3515
3516 result->m_result.Append(toIndex < toLastIndex ? toIndex : toLastIndex, 2);
3517 result->m_result.Append(toIndex > toLastIndex ? toIndex : toLastIndex);
3518 result->m_result.Append(ntime);
3519
3520 return result_string.Detach();
3521 }
3522
ReleaseString(CHART * tstring)3523 template <class CHART> inline void CRegexpT <CHART> :: ReleaseString(CHART * tstring)
3524 {
3525 if(tstring != 0) delete [] tstring;
3526 }
3527
ReleaseContext(CContext * pContext)3528 template <class CHART> inline void CRegexpT <CHART> :: ReleaseContext(CContext * pContext)
3529 {
3530 if(pContext != 0) delete pContext;
3531 }
3532
3533 //
3534 // All implementations
3535 //
CAlternativeElxT()3536 template <int x> CAlternativeElxT <x> :: CAlternativeElxT()
3537 {
3538 }
3539
Match(CContext * pContext)3540 template <int x> int CAlternativeElxT <x> :: Match(CContext * pContext) const
3541 {
3542 if(m_elxlist.GetSize() == 0)
3543 return 1;
3544
3545 // try all
3546 for(int n = 0; n < m_elxlist.GetSize(); n++)
3547 {
3548 if(m_elxlist[n]->Match(pContext))
3549 {
3550 pContext->m_stack.Push(n);
3551 return 1;
3552 }
3553 }
3554
3555 return 0;
3556 }
3557
MatchNext(CContext * pContext)3558 template <int x> int CAlternativeElxT <x> :: MatchNext(CContext * pContext) const
3559 {
3560 if(m_elxlist.GetSize() == 0)
3561 return 0;
3562
3563 int n = 0;
3564
3565 // recall prev
3566 pContext->m_stack.Pop(n);
3567
3568 // prev
3569 if(m_elxlist[n]->MatchNext(pContext))
3570 {
3571 pContext->m_stack.Push(n);
3572 return 1;
3573 }
3574 else
3575 {
3576 // try rest
3577 for(n++; n < m_elxlist.GetSize(); n++)
3578 {
3579 if(m_elxlist[n]->Match(pContext))
3580 {
3581 pContext->m_stack.Push(n);
3582 return 1;
3583 }
3584 }
3585
3586 return 0;
3587 }
3588 }
3589
3590 // assertx.cpp: implementation of the CAssertElx class.
3591 //
CAssertElxT(ElxInterface * pelx,int byes)3592 template <int x> CAssertElxT <x> :: CAssertElxT(ElxInterface * pelx, int byes)
3593 {
3594 m_pelx = pelx;
3595 m_byes = byes;
3596 }
3597
Match(CContext * pContext)3598 template <int x> int CAssertElxT <x> :: Match(CContext * pContext) const
3599 {
3600 int nbegin = pContext->m_nCurrentPos;
3601 int nsize = pContext->m_stack.GetSize();
3602 int ncsize = pContext->m_capturestack.GetSize();
3603 int bsucc;
3604
3605 // match
3606 if( m_byes )
3607 bsucc = m_pelx->Match(pContext);
3608 else
3609 bsucc = ! m_pelx->Match(pContext);
3610
3611 // status
3612 pContext->m_stack.Restore(nsize);
3613 pContext->m_nCurrentPos = nbegin;
3614
3615 if( bsucc )
3616 pContext->m_stack.Push(ncsize);
3617 else
3618 pContext->m_capturestack.Restore(ncsize);
3619
3620 return bsucc;
3621 }
3622
MatchNext(CContext * pContext)3623 template <int x> int CAssertElxT <x> :: MatchNext(CContext * pContext) const
3624 {
3625 int ncsize = 0;
3626
3627 pContext->m_stack.Pop(ncsize);
3628 pContext->m_capturestack.Restore(ncsize);
3629
3630 return 0;
3631 }
3632
3633 // emptyelx.cpp: implementation of the CEmptyElx class.
3634 //
CEmptyElxT()3635 template <int x> CEmptyElxT <x> :: CEmptyElxT()
3636 {
3637 }
3638
Match(CContext *)3639 template <int x> int CEmptyElxT <x> :: Match(CContext *) const
3640 {
3641 return 1;
3642 }
3643
MatchNext(CContext *)3644 template <int x> int CEmptyElxT <x> :: MatchNext(CContext *) const
3645 {
3646 return 0;
3647 }
3648
3649 // globalx.cpp: implementation of the CGlobalElx class.
3650 //
CGlobalElxT()3651 template <int x> CGlobalElxT <x> ::CGlobalElxT()
3652 {
3653 }
3654
Match(CContext * pContext)3655 template <int x> int CGlobalElxT <x> :: Match(CContext * pContext) const
3656 {
3657 return pContext->m_nCurrentPos == pContext->m_nBeginPos;
3658 }
3659
MatchNext(CContext *)3660 template <int x> int CGlobalElxT <x> :: MatchNext(CContext *) const
3661 {
3662 return 0;
3663 }
3664
3665 // greedelx.cpp: implementation of the CGreedyElx class.
3666 //
CGreedyElxT(ElxInterface * pelx,int nmin,int nmax)3667 template <int x> CGreedyElxT <x> :: CGreedyElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT <x> (pelx, nmin)
3668 {
3669 m_nvart = nmax - nmin;
3670 }
3671
Match(CContext * pContext)3672 template <int x> int CGreedyElxT <x> :: Match(CContext * pContext) const
3673 {
3674 if( ! CRepeatElxT <x> :: MatchFixed(pContext) )
3675 return 0;
3676
3677 while( ! MatchVart(pContext) )
3678 {
3679 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
3680 return 0;
3681 }
3682
3683 return 1;
3684 }
3685
MatchNext(CContext * pContext)3686 template <int x> int CGreedyElxT <x> :: MatchNext(CContext * pContext) const
3687 {
3688 if( MatchNextVart(pContext) )
3689 return 1;
3690
3691 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
3692 return 0;
3693
3694 while( ! MatchVart(pContext) )
3695 {
3696 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
3697 return 0;
3698 }
3699
3700 return 1;
3701 }
3702
MatchVart(CContext * pContext)3703 template <int x> int CGreedyElxT <x> :: MatchVart(CContext * pContext) const
3704 {
3705 int n = 0;
3706 int nbegin = pContext->m_nCurrentPos;
3707
3708 while(n < m_nvart && CRepeatElxT <x> :: m_pelx->Match(pContext))
3709 {
3710 while(pContext->m_nCurrentPos == nbegin)
3711 {
3712 if( ! CRepeatElxT <x> :: m_pelx->MatchNext(pContext) ) break;
3713 }
3714
3715 if(pContext->m_nCurrentPos == nbegin) break;
3716
3717 n ++;
3718 nbegin = pContext->m_nCurrentPos;
3719 }
3720
3721 pContext->m_stack.Push(n);
3722
3723 return 1;
3724 }
3725
MatchNextVart(CContext * pContext)3726 template <int x> int CGreedyElxT <x> :: MatchNextVart(CContext * pContext) const
3727 {
3728 int n = 0;
3729 pContext->m_stack.Pop(n);
3730
3731 if(n == 0) return 0;
3732
3733 if( ! CRepeatElxT <x> :: m_pelx->MatchNext(pContext) )
3734 {
3735 n --;
3736 }
3737
3738 pContext->m_stack.Push(n);
3739
3740 return 1;
3741 }
3742
3743 // indepelx.cpp: implementation of the CIndependentElx class.
3744 //
CIndependentElxT(ElxInterface * pelx)3745 template <int x> CIndependentElxT <x> :: CIndependentElxT(ElxInterface * pelx)
3746 {
3747 m_pelx = pelx;
3748 }
3749
Match(CContext * pContext)3750 template <int x> int CIndependentElxT <x> :: Match(CContext * pContext) const
3751 {
3752 int nbegin = pContext->m_nCurrentPos;
3753 int nsize = pContext->m_stack.GetSize();
3754 int ncsize = pContext->m_capturestack.GetSize();
3755
3756 // match
3757 int bsucc = m_pelx->Match(pContext);
3758
3759 // status
3760 pContext->m_stack.Restore(nsize);
3761
3762 if( bsucc )
3763 {
3764 pContext->m_stack.Push(nbegin);
3765 pContext->m_stack.Push(ncsize);
3766 }
3767
3768 return bsucc;
3769 }
3770
MatchNext(CContext * pContext)3771 template <int x> int CIndependentElxT <x> :: MatchNext(CContext * pContext) const
3772 {
3773 int nbegin = 0, ncsize = 0;
3774
3775 pContext->m_stack.Pop(ncsize);
3776 pContext->m_stack.Pop(nbegin);
3777
3778 pContext->m_capturestack.Restore(ncsize);
3779 pContext->m_nCurrentPos = nbegin;
3780
3781 return 0;
3782 }
3783
3784 // listelx.cpp: implementation of the CListElx class.
3785 //
CListElxT(int brightleft)3786 template <int x> CListElxT <x> :: CListElxT(int brightleft)
3787 {
3788 m_brightleft = brightleft;
3789 }
3790
Match(CContext * pContext)3791 template <int x> int CListElxT <x> :: Match(CContext * pContext) const
3792 {
3793 if(m_elxlist.GetSize() == 0)
3794 return 1;
3795
3796 // prepare
3797 int bol = m_brightleft ? m_elxlist.GetSize() : -1;
3798 int stp = m_brightleft ? -1 : 1;
3799 int eol = m_brightleft ? -1 : m_elxlist.GetSize();
3800
3801 // from first
3802 int n = bol + stp;
3803
3804 // match all
3805 while(n != eol)
3806 {
3807 if(m_elxlist[n]->Match(pContext))
3808 {
3809 n += stp;
3810 }
3811 else
3812 {
3813 n -= stp;
3814
3815 while(n != bol && ! m_elxlist[n]->MatchNext(pContext))
3816 n -= stp;
3817
3818 if(n != bol)
3819 n += stp;
3820 else
3821 return 0;
3822 }
3823 }
3824
3825 return 1;
3826 }
3827
MatchNext(CContext * pContext)3828 template <int x> int CListElxT <x> :: MatchNext(CContext * pContext) const
3829 {
3830 if(m_elxlist.GetSize() == 0)
3831 return 0;
3832
3833 // prepare
3834 int bol = m_brightleft ? m_elxlist.GetSize() : -1;
3835 int stp = m_brightleft ? -1 : 1;
3836 int eol = m_brightleft ? -1 : m_elxlist.GetSize();
3837
3838 // from last
3839 int n = eol - stp;
3840
3841 while(n != bol && ! m_elxlist[n]->MatchNext(pContext))
3842 n -= stp;
3843
3844 if(n != bol)
3845 n += stp;
3846 else
3847 return 0;
3848
3849 // match rest
3850 while(n != eol)
3851 {
3852 if(m_elxlist[n]->Match(pContext))
3853 {
3854 n += stp;
3855 }
3856 else
3857 {
3858 n -= stp;
3859
3860 while(n != bol && ! m_elxlist[n]->MatchNext(pContext))
3861 n -= stp;
3862
3863 if(n != bol)
3864 n += stp;
3865 else
3866 return 0;
3867 }
3868 }
3869
3870 return 1;
3871 }
3872
3873 // mresult.cpp: implementation of the MatchResult class.
3874 //
MatchResultT(CContext * pContext,int nMaxNumber)3875 template <int x> MatchResultT <x> :: MatchResultT(CContext * pContext, int nMaxNumber)
3876 {
3877 if(pContext != 0)
3878 {
3879 m_result.Prepare(nMaxNumber * 2 + 3, -1);
3880
3881 // matched
3882 m_result[0] = 1;
3883 m_result[1] = nMaxNumber;
3884
3885 for(int n = 0; n <= nMaxNumber; n++)
3886 {
3887 int index = pContext->m_captureindex[n];
3888 if( index < 0 ) continue;
3889
3890 // check enclosed
3891 int pos1 = pContext->m_capturestack[index + 1];
3892 int pos2 = pContext->m_capturestack[index + 2];
3893
3894 // info
3895 m_result[n*2 + 2] = pos1 < pos2 ? pos1 : pos2;
3896 m_result[n*2 + 3] = pos1 < pos2 ? pos2 : pos1;
3897 }
3898 }
3899 }
3900
IsMatched()3901 template <int x> inline int MatchResultT <x> :: IsMatched() const
3902 {
3903 return m_result.At(0, 0);
3904 }
3905
MaxGroupNumber()3906 template <int x> inline int MatchResultT <x> :: MaxGroupNumber() const
3907 {
3908 return m_result.At(1, 0);
3909 }
3910
GetStart()3911 template <int x> inline int MatchResultT <x> :: GetStart() const
3912 {
3913 return m_result.At(2, -1);
3914 }
3915
GetEnd()3916 template <int x> inline int MatchResultT <x> :: GetEnd() const
3917 {
3918 return m_result.At(3, -1);
3919 }
3920
GetGroupStart(int nGroupNumber)3921 template <int x> inline int MatchResultT <x> :: GetGroupStart(int nGroupNumber) const
3922 {
3923 return m_result.At(2 + nGroupNumber * 2, -1);
3924 }
3925
GetGroupEnd(int nGroupNumber)3926 template <int x> inline int MatchResultT <x> :: GetGroupEnd(int nGroupNumber) const
3927 {
3928 return m_result.At(2 + nGroupNumber * 2 + 1, -1);
3929 }
3930
3931 template <int x> MatchResultT <x> & MatchResultT <x> :: operator = (const MatchResultT <x> & result)
3932 {
3933 m_result.Restore(0);
3934 if(result.m_result.GetSize() > 0) m_result.Append(result.m_result.GetBuffer(), result.m_result.GetSize());
3935
3936 return *this;
3937 }
3938
3939 // posselx.cpp: implementation of the CPossessiveElx class.
3940 //
CPossessiveElxT(ElxInterface * pelx,int nmin,int nmax)3941 template <int x> CPossessiveElxT <x> :: CPossessiveElxT(ElxInterface * pelx, int nmin, int nmax) : CGreedyElxT <x> (pelx, nmin, nmax)
3942 {
3943 }
3944
Match(CContext * pContext)3945 template <int x> int CPossessiveElxT <x> :: Match(CContext * pContext) const
3946 {
3947 int nbegin = pContext->m_nCurrentPos;
3948 int nsize = pContext->m_stack.GetSize();
3949 int ncsize = pContext->m_capturestack.GetSize();
3950 int bsucc = 1;
3951
3952 // match
3953 if( ! CRepeatElxT <x> :: MatchFixed(pContext) )
3954 {
3955 bsucc = 0;
3956 }
3957 else
3958 {
3959 while( ! CGreedyElxT <x> :: MatchVart(pContext) )
3960 {
3961 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
3962 {
3963 bsucc = 0;
3964 break;
3965 }
3966 }
3967 }
3968
3969 // status
3970 pContext->m_stack.Restore(nsize);
3971
3972 if( bsucc )
3973 {
3974 pContext->m_stack.Push(nbegin);
3975 pContext->m_stack.Push(ncsize);
3976 }
3977
3978 return bsucc;
3979 }
3980
MatchNext(CContext * pContext)3981 template <int x> int CPossessiveElxT <x> :: MatchNext(CContext * pContext) const
3982 {
3983 int nbegin = 0, ncsize = 0;
3984
3985 pContext->m_stack.Pop(ncsize);
3986 pContext->m_stack.Pop(nbegin);
3987
3988 pContext->m_capturestack.Restore(ncsize);
3989 pContext->m_nCurrentPos = nbegin;
3990
3991 return 0;
3992 }
3993
3994 // reluctx.cpp: implementation of the CReluctantElx class.
3995 //
CReluctantElxT(ElxInterface * pelx,int nmin,int nmax)3996 template <int x> CReluctantElxT <x> :: CReluctantElxT(ElxInterface * pelx, int nmin, int nmax) : CRepeatElxT <x> (pelx, nmin)
3997 {
3998 m_nvart = nmax - nmin;
3999 }
4000
Match(CContext * pContext)4001 template <int x> int CReluctantElxT <x> :: Match(CContext * pContext) const
4002 {
4003 if( ! CRepeatElxT <x> :: MatchFixed(pContext) )
4004 return 0;
4005
4006 while( ! MatchVart(pContext) )
4007 {
4008 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
4009 return 0;
4010 }
4011
4012 return 1;
4013 }
4014
MatchNext(CContext * pContext)4015 template <int x> int CReluctantElxT <x> :: MatchNext(CContext * pContext) const
4016 {
4017 if( MatchNextVart(pContext) )
4018 return 1;
4019
4020 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
4021 return 0;
4022
4023 while( ! MatchVart(pContext) )
4024 {
4025 if( ! CRepeatElxT <x> :: MatchNextFixed(pContext) )
4026 return 0;
4027 }
4028
4029 return 1;
4030 }
4031
MatchVart(CContext * pContext)4032 template <int x> int CReluctantElxT <x> :: MatchVart(CContext * pContext) const
4033 {
4034 pContext->m_stack.Push(0);
4035
4036 return 1;
4037 }
4038
MatchNextVart(CContext * pContext)4039 template <int x> int CReluctantElxT <x> :: MatchNextVart(CContext * pContext) const
4040 {
4041 int n = 0, nbegin = pContext->m_nCurrentPos;
4042
4043 pContext->m_stack.Pop(n);
4044
4045 if(n < m_nvart && CRepeatElxT <x> :: m_pelx->Match(pContext))
4046 {
4047 while(pContext->m_nCurrentPos == nbegin)
4048 {
4049 if( ! CRepeatElxT <x> :: m_pelx->MatchNext(pContext) ) break;
4050 }
4051
4052 if(pContext->m_nCurrentPos != nbegin)
4053 {
4054 n ++;
4055
4056 pContext->m_stack.Push(nbegin);
4057 pContext->m_stack.Push(n);
4058
4059 return 1;
4060 }
4061 }
4062
4063 while(n > 0)
4064 {
4065 pContext->m_stack.Pop(nbegin);
4066
4067 while( CRepeatElxT <x> :: m_pelx->MatchNext(pContext) )
4068 {
4069 if(pContext->m_nCurrentPos != nbegin)
4070 {
4071 pContext->m_stack.Push(nbegin);
4072 pContext->m_stack.Push(n);
4073
4074 return 1;
4075 }
4076 }
4077
4078 n --;
4079 }
4080
4081 return 0;
4082 }
4083
4084 // repeatx.cpp: implementation of the CRepeatElx class.
4085 //
CRepeatElxT(ElxInterface * pelx,int ntimes)4086 template <int x> CRepeatElxT <x> :: CRepeatElxT(ElxInterface * pelx, int ntimes)
4087 {
4088 m_pelx = pelx;
4089 m_nfixed = ntimes;
4090 }
4091
Match(CContext * pContext)4092 template <int x> int CRepeatElxT <x> :: Match(CContext * pContext) const
4093 {
4094 return MatchFixed(pContext);
4095 }
4096
MatchNext(CContext * pContext)4097 template <int x> int CRepeatElxT <x> :: MatchNext(CContext * pContext) const
4098 {
4099 return MatchNextFixed(pContext);
4100 }
4101
MatchFixed(CContext * pContext)4102 template <int x> int CRepeatElxT <x> :: MatchFixed(CContext * pContext) const
4103 {
4104 if(m_nfixed == 0)
4105 return 1;
4106
4107 int n = 0;
4108
4109 while(n < m_nfixed)
4110 {
4111 if(m_pelx->Match(pContext))
4112 {
4113 n ++;
4114 }
4115 else
4116 {
4117 n --;
4118
4119 while(n >= 0 && ! m_pelx->MatchNext(pContext))
4120 n --;
4121
4122 if(n >= 0)
4123 n ++;
4124 else
4125 return 0;
4126 }
4127 }
4128
4129 return 1;
4130 }
4131
MatchNextFixed(CContext * pContext)4132 template <int x> int CRepeatElxT <x> :: MatchNextFixed(CContext * pContext) const
4133 {
4134 if(m_nfixed == 0)
4135 return 0;
4136
4137 // from last
4138 int n = m_nfixed - 1;
4139
4140 while(n >= 0 && ! m_pelx->MatchNext(pContext))
4141 n --;
4142
4143 if(n >= 0)
4144 n ++;
4145 else
4146 return 0;
4147
4148 // match rest
4149 while(n < m_nfixed)
4150 {
4151 if(m_pelx->Match(pContext))
4152 {
4153 n ++;
4154 }
4155 else
4156 {
4157 n --;
4158
4159 while(n >= 0 && ! m_pelx->MatchNext(pContext))
4160 n --;
4161
4162 if(n >= 0)
4163 n ++;
4164 else
4165 return 0;
4166 }
4167 }
4168
4169 return 1;
4170 }
4171
4172 // Regexp
4173 typedef CRegexpT <char> CRegexpA;
4174 typedef CRegexpT <unsigned short> CRegexpW;
4175
4176 #if defined(_UNICODE) || defined(UNICODE)
4177 typedef CRegexpW CRegexp;
4178 #else
4179 typedef CRegexpA CRegexp;
4180 #endif
4181
4182 #endif//__DEELX_REGEXP__H__
4183