1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) INRIA
4 * Copyright (C) DIGITEO - 2009
5 *
6  * Copyright (C) 2012 - 2016 - Scilab Enterprises
7  *
8  * This file is hereby licensed under the terms of the GNU GPL v2.0,
9  * pursuant to article 5.3.4 of the CeCILL v.2.1.
10  * This file was originally licensed under the terms of the CeCILL v2.1,
11  * and continues to be available under such terms.
12  * For more information, see the COPYING file which you should have received
13  * along with this program.
14 *
15 */
16 
17 /*-------------------------------------------------------------------------------*/
18 #include <ctype.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <time.h>
23 #include <locale.h>
24 #include <errno.h>
25 #include <pcre.h>
26 #include "sci_malloc.h"
27 #include "BOOL.h"
28 #include "pcre_private.h"
29 #include "os_string.h"
30 #include "strsubst.h"
31 #include "configvariable_interface.h"
32 #include "sciprint.h"
33 #include "charEncoding.h"
34 #include "freeArrayOfString.h"
35 /*-------------------------------------------------------------------------------*/
36 /* A number of things vary for Windows builds. Originally, pcretest opened its
37 input and output without "b"; then I was told that "b" was needed in some
38 environments, so it was added for release 5.0 to both the input and output. (It
39 makes no difference on Unix-like systems.) Later I was told that it is wrong
40 for the input on Windows. I've now abstracted the modes into two macros that
41 are set here, to make it easier to fiddle with them, and removed "b" from the
42 input mode under Windows. */
43 /*-------------------------------------------------------------------------------*/
44 #if _MSC_VER
45 #include <io.h>                /* For _setmode() */
46 #include <fcntl.h>             /* For _O_BINARY */
47 #define INPUT_MODE   "r"
48 #define OUTPUT_MODE  "wb"
49 #else
50 #include <sys/time.h>          /* These two includes are needed */
51 #include <sys/resource.h>      /* for setrlimit(). */
52 #define INPUT_MODE   "rb"
53 #define OUTPUT_MODE  "wb"
54 #endif
55 
56 #define LINK_SIZE		2
57 
58 /* We have to include pcre_internal.h because we need the internal info for
59 displaying the results of pcre_study() and we also need to know about the
60 internal macros, structures, and other internal data values; pcretest has
61 "inside information" compared to a program that strictly follows the PCRE API.
62 
63 Although pcre_internal.h does itself include pcre.h, we explicitly include it
64 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
65 appropriately for an application, not for building PCRE. */
66 
67 #include <pcre.h>
68 #include "pcre_internal.h"
69 
70 /* We need access to the data tables that PCRE uses. So as not to have to keep
71 two copies, we include the source file here, changing the names of the external
72 symbols to prevent clashes. */
73 
74 #define _pcre_utf8_table1      utf8_table1
75 #define _pcre_utf8_table1_size utf8_table1_size
76 #define _pcre_utf8_table2      utf8_table2
77 #define _pcre_utf8_table3      utf8_table3
78 #define _pcre_utf8_table4      utf8_table4
79 #define _pcre_utt              utt
80 #define _pcre_utt_size         utt_size
81 #define _pcre_utt_names        utt_names
82 #define _pcre_OP_lengths       OP_lengths
83 
84 #include "pcre_tables.c"
85 
86 /* It is possible to compile this test program without including support for
87 testing the POSIX interface, though this is not available via the standard
88 Makefile. */
89 
90 #if !defined NOPOSIX
91 #include "pcreposix.h"
92 #endif
93 
94 /* It is also possible, for the benefit of the version currently imported into
95 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
96 interface to the DFA matcher (NODFA), and without the doublecheck of the old
97 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
98 UTF8 support if PCRE is built without it. */
99 
100 #ifndef SUPPORT_UTF8
101 #ifndef NOUTF8
102 #define NOUTF8
103 #endif
104 #endif
105 /*-------------------------------------------------------------------------------*/
106 /* Static variables */
107 
108 static int callout_count = 0;
109 static int callout_fail_count = 0;
110 static int callout_fail_id = 0;
111 
112 /* The buffers grow automatically if very long input lines are encountered. */
113 
114 char *buffer = NULL;
115 
116 
117 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
118                              int start_offset, int options, int *use_offsets, int use_size_offsets,
119                              int flag, unsigned long int *limit, int errnumber);
120 
121 
122 /*************************************************
123 *        Check match or recursion limit          *
124 *************************************************/
125 
check_match_limit(pcre * re,pcre_extra * extra,char * bptr,int len,int start_offset,int options,int * use_offsets,int use_size_offsets,int flag,unsigned long int * limit,int errnumber)126 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
127                              int start_offset, int options, int *use_offsets, int use_size_offsets,
128                              int flag, unsigned long int *limit, int errnumber)
129 {
130     int count;
131     int min = 0;
132     int mid = 64;
133     int max = -1;
134 
135     extra->flags |= flag;
136 
137     for (;;)
138     {
139         *limit = mid;
140 
141         count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
142                           use_offsets, use_size_offsets);
143 
144         if (count == errnumber)
145         {
146             min = mid;
147             mid = (mid == max - 1) ? max : (max > 0) ? (min + max) / 2 : mid * 2;
148         }
149 
150         else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
151                  count == PCRE_ERROR_PARTIAL)
152         {
153             if (mid == min + 1)
154             {
155                 break;
156             }
157             max = mid;
158             mid = (min + mid) / 2;
159         }
160         else
161         {
162             break;    /* Some other error */
163         }
164     }
165 
166     extra->flags &= ~flag;
167     return count;
168 }
169 
170 
171 /*************************************************
172 *               Algorithm                      *
173 *************************************************/
174 
175 /* Read lines from named file or stdin and write to named file or stdout; lines
176 consist of a regular expression, in delimiters and optionally followed by
177 options, followed by a set of test data, terminated by an empty line. */
178 
pcre_private(const char * INPUT_LINE,const char * INPUT_PAT,int * Output_Start,int * Output_End,char *** _pstCapturedString,int * _piCapturedStringCount)179 pcre_error_code pcre_private(const char *INPUT_LINE, const char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
180 {
181     /* ALL strings are managed as UTF-8 by default */
182     int options = PCRE_UTF8;
183     int size_offsets = 45;
184     int size_offsets_max;
185     int *offsets = NULL;
186     int all_use_dfa = 0;
187     BOOL LOOP_PCRE_TST = FALSE;
188 
189     /* These vectors store, end-to-end, a list of captured substring names. Assume
190     that 1024 is plenty long enough for the few names we'll be testing. */
191 
192     char copynames[1024];
193     char getnames[1024];
194 
195     char *copynamesptr = NULL;
196     char *getnamesptr = NULL;
197 
198     int rc = 0;
199     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
200     if (rc != 1)
201     {
202         return UTF8_NOT_SUPPORTED;
203     }
204 
205     /* bug 3891 */
206     /* backslash characters are not interpreted for input */
207     buffer = strsub(INPUT_LINE, "\\", "\\\\");
208 
209     size_offsets_max = size_offsets;
210     offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
211     if (offsets == NULL)
212     {
213         if (buffer)
214         {
215             FREE(buffer);
216             buffer = NULL;
217         }
218         return NOT_ENOUGH_MEMORY_FOR_VECTOR;
219     }
220     /* Main loop */
221     LOOP_PCRE_TST = FALSE;
222     while (!LOOP_PCRE_TST)
223     {
224         pcre *re = NULL;
225         pcre_extra *extra = NULL;
226         const char *error = NULL;
227         char *back_p = NULL;
228         char *p = NULL;
229         char *pp = NULL;
230         char *ppp = NULL;
231         const unsigned char *tables = NULL;
232         int do_G = 0;
233         int do_g = 0;
234         int erroroffset = 0, len = 0, delimiter;
235 
236         LOOP_PCRE_TST = TRUE;
237         p = os_strdup(INPUT_PAT);
238         back_p = p;
239         while (isspace(*p))
240         {
241             p++;
242         }
243         if (*p == 0)
244         {
245             FREE(back_p);
246             continue;
247         }
248         /* In-line pattern (the usual case). Get the delimiter and seek the end of
249         the pattern; if is isn't complete, read more. */
250 
251         delimiter = *p++;
252 
253         if (isalnum(delimiter) || delimiter == '\\')
254         {
255             if (buffer)
256             {
257                 FREE(buffer);
258                 buffer = NULL;
259             }
260             if (offsets)
261             {
262                 FREE(offsets);
263                 offsets = NULL;
264             }
265             if (back_p)
266             {
267                 FREE(back_p);
268                 back_p = NULL;
269             }
270             return DELIMITER_NOT_ALPHANUMERIC;
271         }
272 
273         pp = p;
274 
275         while (*pp != 0)
276         {
277             if (*pp == '\\' && pp[1] != 0)
278             {
279                 pp++;
280             }
281             else if (*pp == delimiter)
282             {
283                 break;
284             }
285             pp++;
286         }
287 
288         /* If the delimiter can't be found, it's a syntax error */
289         if (*pp == 0)
290         {
291             if (buffer)
292             {
293                 FREE(buffer);
294                 buffer = NULL;
295             }
296             if (offsets)
297             {
298                 FREE(offsets);
299                 offsets = NULL;
300             }
301             if (back_p)
302             {
303                 FREE(back_p);
304                 back_p = NULL;
305             }
306             if (offsets)
307             {
308                 FREE(offsets);
309             }
310             return CAN_NOT_COMPILE_PATTERN;
311         }
312 
313         /* If the first character after the delimiter is backslash, make
314         the pattern end with backslash. This is purely to provide a way
315         of testing for the error message when a pattern ends with backslash. */
316 
317         if (pp[1] == '\\')
318         {
319             *pp++ = '\\';
320         }
321 
322         /* Terminate the pattern at the delimiter, and save a copy of the pattern
323         for callouts. */
324 
325         *pp++ = 0;
326 
327         /* Look for options after final delimiter */
328 
329         //options = 8192;
330 
331         while (*pp != 0)
332         {
333             switch (*pp++)
334             {
335                 case 'f':
336                     options |= PCRE_FIRSTLINE;
337                     break;
338                 case 'g':
339                     do_g = 1;
340                     break;
341                 case 'i':
342                     options |= PCRE_CASELESS;
343                     break;
344                 case 'm':
345                     options |= PCRE_MULTILINE;
346                     break;
347                 case 's':
348                     options |= PCRE_DOTALL;
349                     break;
350                 case 'x':
351                     options |= PCRE_EXTENDED;
352                     break;
353                 case '+':
354                     break;
355                 case 'A':
356                     options |= PCRE_ANCHORED;
357                     break;
358                 case 'B':
359                     break;
360                 case 'C':
361                     options |= PCRE_AUTO_CALLOUT;
362                     break;
363                 case 'D':
364                     break;
365                 case 'E':
366                     options |= PCRE_DOLLAR_ENDONLY;
367                     break;
368                 case 'F':
369                     break;
370                 case 'G':
371                     do_G = 1;
372                     break;
373                 case 'I':
374                     break;
375                 case 'J':
376                     options |= PCRE_DUPNAMES;
377                     break;
378                 case 'M':
379                     break;
380                 case 'N':
381                     options |= PCRE_NO_AUTO_CAPTURE;
382                     break;
383                 case 'S':
384                     break;
385                 case 'U':
386                     options |= PCRE_UNGREEDY;
387                     break;
388                 case 'X':
389                     options |= PCRE_EXTRA;
390                     break;
391                 case 'Z':
392                     break;
393                 case '8':
394                 {
395                     int rc = 0;
396                     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
397                     if (rc != 1)
398                     {
399                         if (buffer)
400                         {
401                             FREE(buffer);
402                             buffer = NULL;
403                         }
404                         if (back_p)
405                         {
406                             FREE(back_p);
407                             back_p = NULL;
408                         }
409                         if (offsets)
410                         {
411                             FREE(offsets);
412                         }
413                         return UTF8_NOT_SUPPORTED;
414                     }
415                     options |= PCRE_UTF8;
416                 }
417                 break;
418                 case '?':
419                     options |= PCRE_NO_UTF8_CHECK;
420                     break;
421                 case 'L':
422                     ppp = pp;
423                     /* The '\r' test here is so that it works on Windows. */
424                     /* The '0' test is just in case this is an unterminated line. */
425                     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
426                     {
427                         ppp++;
428                     }
429                     *ppp = 0;
430                     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
431                     {
432                         goto SKIP_DATA;
433                     }
434 
435                     tables = pcre_maketables();
436                     pp = ppp;
437                     break;
438                 case '>':
439                     while (*pp != 0)
440                     {
441                         pp++;
442                     }
443                     while (isspace(pp[-1]))
444                     {
445                         pp--;
446                     }
447                     *pp = 0;
448                     break;
449                 case '<':
450                 {
451                     while (*pp++ != '>')
452                     {
453                         ;
454                     }
455                 }
456                 break;
457                 case '\r':                      /* So that it works in Windows */
458                 case '\n':
459                 case ' ':
460                     break;
461 
462                 default:
463                     goto SKIP_DATA;
464             }
465         }
466 
467         /* Handle compiling via the POSIX interface, which doesn't support the
468         timing, showing, or debugging options, nor the ability to pass over
469         local character tables. */
470 
471 
472         {
473             re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
474             /* Compilation failed; go back for another re, skipping to blank line
475             if non-interactive. */
476             if (re == NULL)
477             {
478 SKIP_DATA:
479                 if (buffer)
480                 {
481                     FREE(buffer);
482                     buffer = NULL;
483                 }
484                 if (offsets)
485                 {
486                     FREE(offsets);
487                     offsets = NULL;
488                 }
489                 if (tables)
490                 {
491                     (*pcre_free)((void*)tables);
492                     tables = NULL;
493                 }
494                 if (extra)
495                 {
496                     FREE(extra);
497                     extra = NULL;
498                 }
499                 if (back_p)
500                 {
501                     FREE(back_p);
502                     back_p = NULL;
503                 }
504                 return CAN_NOT_COMPILE_PATTERN;
505             }
506 
507         }        /* End of non-POSIX compile */
508 
509         /* Read data lines and test them */
510         {
511             char *q = NULL;
512             char *bptr = NULL;
513             int *use_offsets = offsets;
514             int use_size_offsets = size_offsets;
515             int callout_data = 0;
516             int callout_data_set = 0;
517             int count = 0;
518             int c = 0;
519             int copystrings = 0;
520             int find_match_limit = 0;
521             int getstrings = 0;
522             int gmatched = 0;
523             int start_offset = 0;
524             int g_notempty = 0;
525             int use_dfa = 0;
526 
527             options = 0;
528             *copynames = 0;
529             *getnames = 0;
530 
531             copynamesptr = copynames;
532             getnamesptr = getnames;
533 
534             callout_count = 0;
535             callout_fail_count = 999999;
536             callout_fail_id = -1;
537 
538             if (extra != NULL)
539             {
540                 extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
541             }
542             p = buffer;
543             bptr = q = buffer;
544             while ((c = *p++) != 0)
545             {
546                 int i = 0;
547                 int n = 0;
548 
549                 if (c == '\\') switch ((c = *p++))
550                     {
551                         case 'a':
552                             c =    7;
553                             break;
554                         case 'b':
555                             c = '\b';
556                             break;
557                         case 'e':
558                             c =   27;
559                             break;
560                         case 'f':
561                             c = '\f';
562                             break;
563                         case 'n':
564                             c = '\n';
565                             break;
566                         case 'r':
567                             c = '\r';
568                             break;
569                         case 't':
570                             c = '\t';
571                             break;
572                         case 'v':
573                             c = '\v';
574                             break;
575                         case '0':
576                         case '1':
577                         case '2':
578                         case '3':
579                         case '4':
580                         case '5':
581                         case '6':
582                         case '7':
583                             c -= '0';
584                             while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
585                             {
586                                 c = c * 8 + *p++ - '0';
587                             }
588                             break;
589                         case 'x':
590                             /* Ordinary \x */
591                             c = 0;
592                             while (i++ < 2 && isxdigit(*p))
593                             {
594                                 c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
595                                 p++;
596                             }
597                             break;
598                         case 0:   /* \ followed by EOF allows for an empty line */
599                             p--;
600                             continue;
601                         case '>':
602                             while (isdigit(*p))
603                             {
604                                 start_offset = start_offset * 10 + *p++ - '0';
605                             }
606                             continue;
607                         case 'A':  /* Option setting */
608                             options |= PCRE_ANCHORED;
609                             continue;
610                         case 'B':
611                             options |= PCRE_NOTBOL;
612                             continue;
613                         case 'C':
614                             if (isdigit(*p))    /* Set copy string */
615                             {
616                                 while (isdigit(*p))
617                                 {
618                                     n = n * 10 + *p++ - '0';
619                                 }
620                                 copystrings |= 1 << n;
621                             }
622                             else if (isalnum(*p))
623                             {
624                                 char *npp = copynamesptr;
625                                 while (isalnum(*p))
626                                 {
627                                     *npp++ = *p++;
628                                 }
629                                 *npp++ = 0;
630                                 *npp = 0;
631                                 pcre_get_stringnumber(re, (char *)copynamesptr);
632                                 copynamesptr = npp;
633                             }
634                             else if (*p == '+')
635                             {
636                                 p++;
637                             }
638                             else if (*p == '-')
639                             {
640                                 p++;
641                             }
642                             else if (*p == '!')
643                             {
644                                 callout_fail_id = 0;
645                                 p++;
646                                 while (isdigit(*p))
647                                 {
648                                     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
649                                 }
650                                 callout_fail_count = 0;
651                                 if (*p == '!')
652                                 {
653                                     p++;
654                                     while (isdigit(*p))
655                                     {
656                                         callout_fail_count = callout_fail_count * 10 + *p++ - '0';
657                                     }
658                                 }
659                             }
660                             else if (*p == '*')
661                             {
662                                 int sign = 1;
663                                 callout_data = 0;
664                                 if (*(++p) == '-')
665                                 {
666                                     sign = -1;
667                                     p++;
668                                 }
669                                 while (isdigit(*p))
670                                 {
671                                     callout_data = callout_data * 10 + *p++ - '0';
672                                 }
673                                 callout_data *= sign;
674                                 callout_data_set = 1;
675                             }
676                             continue;
677                         case 'G':
678                             if (isdigit(*p))
679                             {
680                                 while (isdigit(*p))
681                                 {
682                                     n = n * 10 + *p++ - '0';
683                                 }
684                                 getstrings |= 1 << n;
685                             }
686                             else if (isalnum(*p))
687                             {
688                                 char *npp = getnamesptr;
689                                 while (isalnum(*p))
690                                 {
691                                     *npp++ = *p++;
692                                 }
693                                 *npp++ = 0;
694                                 *npp = 0;
695                                 pcre_get_stringnumber(re, (char *)getnamesptr);
696                                 getnamesptr = npp;
697                             }
698                             continue;
699                         case 'L':
700                             continue;
701                         case 'M':
702                             find_match_limit = 1;
703                             continue;
704                         case 'N':
705                             options |= PCRE_NOTEMPTY;
706                             continue;
707                         case 'O':
708                             while (isdigit(*p))
709                             {
710                                 n = n * 10 + *p++ - '0';
711                             }
712                             if (n > size_offsets_max)
713                             {
714                                 size_offsets_max = n;
715                                 if (offsets)
716                                 {
717                                     FREE(offsets);
718                                 }
719                                 use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
720                             }
721                             use_size_offsets = n;
722                             if (n == 0)
723                             {
724                                 use_offsets = NULL;    /* Ensures it can't write to it */
725                             }
726                             continue;
727                         case 'P':
728                             options |= PCRE_PARTIAL;
729                             continue;
730                         case 'Q':
731                             while (isdigit(*p))
732                             {
733                                 n = n * 10 + *p++ - '0';
734                             }
735                             if (extra == NULL)
736                             {
737                                 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
738                                 extra->flags = 0;
739                             }
740                             extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
741                             extra->match_limit_recursion = n;
742                             continue;
743                         case 'q':
744                             while (isdigit(*p))
745                             {
746                                 n = n * 10 + *p++ - '0';
747                             }
748                             if (extra == NULL)
749                             {
750                                 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
751                                 extra->flags = 0;
752                             }
753                             extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
754                             extra->match_limit = n;
755                             continue;
756 #if !defined NODFA
757                         case 'R':
758                             options |= PCRE_DFA_RESTART;
759                             continue;
760 #endif
761                         case 'S':
762 
763                             continue;
764                         case 'Z':
765                             options |= PCRE_NOTEOL;
766                             continue;
767                         case '?':
768                             options |= PCRE_NO_UTF8_CHECK;
769                             continue;
770                         case '<':
771                         {
772                             while (*p++ != '>')
773                             {
774                                 ;
775                             }
776                         }
777                         continue;
778                     }
779                 *q++ = (char)c;
780             }
781             *q = 0;
782             len = (int)(q - buffer);
783             if ((all_use_dfa || use_dfa) && find_match_limit)
784             {
785                 if (buffer)
786                 {
787                     FREE(buffer);
788                     buffer = NULL;
789                 }
790                 if (offsets)
791                 {
792                     FREE(offsets);
793                     offsets = NULL;
794                 }
795                 if (p)
796                 {
797                     FREE(p);
798                     p = NULL;
799                 }
800                 if (re)
801                 {
802                     (*pcre_free)(re);
803                     re = NULL;
804                 }
805                 if (tables)
806                 {
807                     (*pcre_free)((void*)tables);
808                     tables = NULL;
809                 }
810                 if (extra)
811                 {
812                     FREE(extra);
813                     extra = NULL;
814                 }
815                 return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
816             }
817             /* Handle matching via the POSIX interface, which does not
818             support timing or playing with the match limit or callout data. */
819             for (;; gmatched++)    /* Loop for /g or /G */
820             {
821 
822                 /* If find_match_limit is set, we want to do repeated matches with
823                 varying limits in order to find the minimum value for the match limit and
824                 for the recursion limit. */
825 
826                 if (find_match_limit)
827                 {
828                     if (extra == NULL)
829                     {
830                         extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
831                         extra->flags = 0;
832                     }
833 
834                     (void)check_match_limit(re, extra, bptr, len, start_offset,
835                                             options | g_notempty, use_offsets, use_size_offsets,
836                                             PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
837                                             PCRE_ERROR_MATCHLIMIT);
838 
839                     count = check_match_limit(re, extra, bptr, len, start_offset,
840                                               options | g_notempty, use_offsets, use_size_offsets,
841                                               PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
842                                               PCRE_ERROR_RECURSIONLIMIT);
843                 }
844                 /* If callout_data is set, use the interface with additional data */
845                 else if (callout_data_set)
846                 {
847                     if (extra == NULL)
848                     {
849                         extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
850                         extra->flags = 0;
851                     }
852                     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
853                     extra->callout_data = &callout_data;
854                     count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
855                                       options | g_notempty, use_offsets, use_size_offsets);
856 
857                     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
858                 }
859                 /* The normal case is just to do the match once, with the default
860                 value of match_limit. */
861                 else
862                 {
863                     count = pcre_exec(re, extra, (char *)bptr, len,
864                                       start_offset, options | g_notempty, use_offsets, use_size_offsets);
865                     if (count == 0)
866                     {
867                         count = use_size_offsets / 3;
868                     }
869 
870                     //to retrieve backref count and values
871                     if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
872                     {
873                         int i = 0;
874                         int iErr = 0;
875 
876                         iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
877                         //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);
878 
879                         if (*_piCapturedStringCount > 0)
880                         {
881                             *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
882                             for (i = 0 ; i < *_piCapturedStringCount ; i++)
883                             {
884                                 const char* pstSubstring = NULL;
885                                 pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
886                                 if (pstSubstring != NULL)
887                                 {
888                                     (*_pstCapturedString)[i] = os_strdup(pstSubstring);
889                                 }
890                                 else
891                                 {
892                                     //empty string is matching, so create it
893                                     (*_pstCapturedString)[i] = os_strdup("");
894                                 }
895 
896                                 pcre_free_substring(pstSubstring);
897                             }
898                         }
899                     }
900                 }
901                 /* Matched */
902                 if (count >= 0)
903                 {
904                     int i, maxcount;
905                     maxcount = use_size_offsets / 3;
906                     /* This is a check against a lunatic return value. */
907                     if (count > maxcount)
908                     {
909                         if (buffer)
910                         {
911                             FREE(buffer);
912                             buffer = NULL;
913                         }
914                         if (offsets)
915                         {
916                             FREE(offsets);
917                             offsets = NULL;
918                         }
919                         if (re)
920                         {
921                             (*pcre_free)(re);
922                             re = NULL;
923                         }
924                         if (tables)
925                         {
926                             (*pcre_free)((void*)tables);
927                             tables = NULL;
928                         }
929                         if (extra)
930                         {
931                             FREE(extra);
932                             extra = NULL;
933                         }
934                         if (back_p)
935                         {
936                             FREE(back_p);
937                             back_p = NULL;
938                         }
939                         return TOO_BIG_FOR_OFFSET_SIZE;
940                     }
941 
942                     for (i = 0; i < count * 2; i += 2)
943                     {
944                         if (use_offsets[i] >= 0)
945                         {
946                             *Output_Start = use_offsets[i];
947                             *Output_End = use_offsets[i + 1];
948                             if (buffer)
949                             {
950                                 FREE(buffer);
951                             }
952 
953                             /* use_offsets = offsets no need to free use_offsets if we free offsets */
954                             if (offsets)
955                             {
956                                 FREE(offsets);
957                             }
958 
959                             /* "re" allocated by pcre_compile (better to use free function associated)*/
960                             if (re)
961                             {
962                                 (*pcre_free)(re);
963                             }
964 
965                             if (extra)
966                             {
967                                 FREE(extra);
968                             }
969                             if (tables)
970                             {
971                                 /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
972                                 (*pcre_free)((void *)tables);
973                                 tables = NULL;
974                                 setlocale(LC_CTYPE, "C");
975                             }
976 
977                             if (back_p)
978                             {
979                                 FREE(back_p);
980                                 back_p = NULL;
981                             }
982                             return PCRE_FINISHED_OK;
983                         }
984                     }
985 
986                     for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
987                     {
988                         char copybuffer[256];
989                         pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
990                     }
991 
992                     for (i = 0; i < 32; i++)
993                     {
994                         if ((getstrings & (1 << i)) != 0)
995                         {
996                             const char *substring;
997                             pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
998                         }
999                     }
1000 
1001                     for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
1002                     {
1003                         const char *substring;
1004                         pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
1005                     }
1006 
1007                 }
1008                 /* Failed to match. If this is a /g or /G loop and we previously set
1009                 g_notempty after a null match, this is not necessarily the end. We want
1010                 to advance the start offset, and continue. We won't be at the end of the
1011                 string - that was checked before setting g_notempty.
1012                 Complication arises in the case when the newline option is "any" or
1013                 "anycrlf". If the previous match was at the end of a line terminated by
1014                 CRLF, an advance of one character just passes the \r, whereas we should
1015                 prefer the longer newline sequence, as does the code in pcre_exec().
1016                 Fudge the offset value to achieve this.
1017 
1018                 Otherwise, in the case of UTF-8 matching, the advance must be one
1019                 character, not one byte. */
1020                 else
1021                 {
1022                     if (count == PCRE_ERROR_NOMATCH)
1023                     {
1024                         if (gmatched == 0)
1025                         {
1026                             if (tables)
1027                             {
1028                                 (*pcre_free)((void *)tables);
1029                                 tables = NULL;
1030                             }
1031                             if (re)
1032                             {
1033                                 (*pcre_free)((void *)re);
1034                                 re = NULL;
1035                             }
1036                             if (buffer)
1037                             {
1038                                 FREE(buffer);
1039                                 buffer = NULL;
1040                             }
1041                             if (offsets)
1042                             {
1043                                 FREE(offsets);
1044                             }
1045                             if (p)
1046                             {
1047                                 FREE(back_p);
1048                                 back_p = NULL;
1049                             }
1050                             return NO_MATCH;
1051                         }
1052                     }
1053 
1054                     if (count == PCRE_ERROR_MATCHLIMIT )
1055                     {
1056                         if (tables)
1057                         {
1058                             (*pcre_free)((void *)tables);
1059                             tables = NULL;
1060                         }
1061                         if (re)
1062                         {
1063                             (*pcre_free)((void *)re);
1064                             re = NULL;
1065                         }
1066                         if (buffer)
1067                         {
1068                             FREE(buffer);
1069                             buffer = NULL;
1070                         }
1071                         if (offsets)
1072                         {
1073                             FREE(offsets);
1074                             offsets = NULL;
1075                         }
1076                         if (back_p)
1077                         {
1078                             FREE(back_p);
1079                             back_p = NULL;
1080                         }
1081                         return MATCH_LIMIT;
1082                     }
1083                     break;  /* Out of loop */
1084                 }
1085 
1086                 /* If not /g or /G we are done */
1087                 if (!do_g && !do_G)
1088                 {
1089                     break;
1090                 }
1091 
1092                 /* If we have matched an empty string, first check to see if we are at
1093                 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1094                 what Perl's /g options does. This turns out to be rather cunning. First
1095                 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1096                 same point. If this fails (picked up above) we advance to the next
1097                 character. */
1098 
1099                 g_notempty = 0;
1100 
1101                 if (use_offsets[0] == use_offsets[1])
1102                 {
1103                     if (use_offsets[0] == len)
1104                     {
1105                         break;
1106                     }
1107                     g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1108                 }
1109 
1110                 /* For /g, update the start offset, leaving the rest alone */
1111 
1112                 if (do_g)
1113                 {
1114                     start_offset = use_offsets[1];
1115                 }
1116                 /* For /G, update the pointer and length */
1117                 else
1118                 {
1119                     bptr += use_offsets[1];
1120                     len -= use_offsets[1];
1121                 }
1122             }  /* End of loop for /g and /G */
1123 
1124             if (re)
1125             {
1126                 (*pcre_free)(re);
1127                 re = NULL;
1128             }
1129             if (extra)
1130             {
1131                 FREE(extra);
1132                 extra = NULL;
1133             }
1134             if (tables)
1135             {
1136                 (*pcre_free)((void *)tables);
1137                 tables = NULL;
1138             }
1139 
1140             FREE(back_p);
1141             back_p = NULL;
1142             continue;
1143         }    /* End of loop for data lines */
1144     }
1145 
1146     if (buffer)
1147     {
1148         FREE(buffer);
1149         buffer = NULL;
1150     }
1151     if (offsets)
1152     {
1153         FREE(offsets);
1154         offsets = NULL;
1155     }
1156 
1157     return PCRE_EXIT;
1158 }
1159 /*-------------------------------------------------------------------------------*/
wide_pcre_private(const wchar_t * _pwstInput,const wchar_t * _pwstPattern,int * _piStart,int * _piEnd,wchar_t *** _pstCapturedString,int * _piCapturedStringCount)1160 pcre_error_code wide_pcre_private(const wchar_t* _pwstInput, const wchar_t* _pwstPattern, int* _piStart, int* _piEnd, wchar_t*** _pstCapturedString, int* _piCapturedStringCount)
1161 {
1162     pcre_error_code iPcreStatus = PCRE_FINISHED_OK;
1163     int i               = 0;
1164     int iStart          = 0;
1165     int iEnd            = 0;
1166 
1167     char* pstInput      = wide_string_to_UTF8(_pwstInput);
1168     char* pstPattern    = wide_string_to_UTF8(_pwstPattern);
1169     char** pstCaptured  = NULL;//(char**)MALLOC(sizeof(char*) * (strlen(pstInput) + 1));
1170 
1171     iPcreStatus = pcre_private(pstInput, pstPattern, &iStart, &iEnd, &pstCaptured, _piCapturedStringCount);
1172     if (iPcreStatus == PCRE_FINISHED_OK && iStart != iEnd)
1173     {
1174         char* pstTempStart      = NULL;
1175         char* pstTempEnd        = NULL;
1176         wchar_t* pwstTempStart  = NULL;
1177         wchar_t* pwstTempEnd    = NULL;
1178 
1179         pstTempStart            = os_strdup(pstInput);
1180         pstTempEnd              = os_strdup(pstInput);
1181         pstTempEnd[iEnd]        = 0;
1182         pstTempStart[iStart]    = 0;
1183 
1184 
1185         pwstTempStart           = to_wide_string(pstTempStart);
1186         pwstTempEnd             = to_wide_string(pstTempEnd);
1187 
1188         *_piStart               = (int)wcslen(pwstTempStart);
1189         *_piEnd                 = (int)wcslen(pwstTempEnd);
1190 
1191         if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1192         {
1193             /*convert captured field in wide char*/
1194             *_pstCapturedString = (wchar_t**)MALLOC(sizeof(wchar_t*) * *_piCapturedStringCount);
1195             for (i = 0 ; i < *_piCapturedStringCount ; i++)
1196             {
1197                 (*_pstCapturedString)[i] = to_wide_string(pstCaptured[i]);
1198             }
1199             freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1200         }
1201 
1202         FREE(pstTempStart);
1203         FREE(pstTempEnd);
1204         FREE(pwstTempStart);
1205         FREE(pwstTempEnd);
1206     }
1207     else
1208     {
1209         *_piStart   = iStart;
1210         *_piEnd     = iEnd;
1211         if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1212         {
1213             /*free unused captured field*/
1214             freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1215         }
1216     }
1217 
1218     FREE(pstInput);
1219     FREE(pstPattern);
1220     return iPcreStatus;
1221 }
1222 /*-------------------------------------------------------------------------------*/
1223