1 /*
2 * Scilab ( http://www.scilab.org/ ) - This file is part of Scilab
3 * Copyright (C) INRIA
4 * Copyright (C) DIGITEO - 2009
5 *
6 * Copyright (C) 2012 - 2016 - Scilab Enterprises
7 *
8 * This file is hereby licensed under the terms of the GNU GPL v2.0,
9 * pursuant to article 5.3.4 of the CeCILL v.2.1.
10 * This file was originally licensed under the terms of the CeCILL v2.1,
11 * and continues to be available under such terms.
12 * For more information, see the COPYING file which you should have received
13 * along with this program.
14 *
15 */
16
17 /*-------------------------------------------------------------------------------*/
18 #include <ctype.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <time.h>
23 #include <locale.h>
24 #include <errno.h>
25 #include <pcre.h>
26 #include "sci_malloc.h"
27 #include "BOOL.h"
28 #include "pcre_private.h"
29 #include "os_string.h"
30 #include "strsubst.h"
31 #include "configvariable_interface.h"
32 #include "sciprint.h"
33 #include "charEncoding.h"
34 #include "freeArrayOfString.h"
35 /*-------------------------------------------------------------------------------*/
36 /* A number of things vary for Windows builds. Originally, pcretest opened its
37 input and output without "b"; then I was told that "b" was needed in some
38 environments, so it was added for release 5.0 to both the input and output. (It
39 makes no difference on Unix-like systems.) Later I was told that it is wrong
40 for the input on Windows. I've now abstracted the modes into two macros that
41 are set here, to make it easier to fiddle with them, and removed "b" from the
42 input mode under Windows. */
43 /*-------------------------------------------------------------------------------*/
44 #if _MSC_VER
45 #include <io.h> /* For _setmode() */
46 #include <fcntl.h> /* For _O_BINARY */
47 #define INPUT_MODE "r"
48 #define OUTPUT_MODE "wb"
49 #else
50 #include <sys/time.h> /* These two includes are needed */
51 #include <sys/resource.h> /* for setrlimit(). */
52 #define INPUT_MODE "rb"
53 #define OUTPUT_MODE "wb"
54 #endif
55
56 #define LINK_SIZE 2
57
58 /* We have to include pcre_internal.h because we need the internal info for
59 displaying the results of pcre_study() and we also need to know about the
60 internal macros, structures, and other internal data values; pcretest has
61 "inside information" compared to a program that strictly follows the PCRE API.
62
63 Although pcre_internal.h does itself include pcre.h, we explicitly include it
64 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
65 appropriately for an application, not for building PCRE. */
66
67 #include <pcre.h>
68 #include "pcre_internal.h"
69
70 /* We need access to the data tables that PCRE uses. So as not to have to keep
71 two copies, we include the source file here, changing the names of the external
72 symbols to prevent clashes. */
73
74 #define _pcre_utf8_table1 utf8_table1
75 #define _pcre_utf8_table1_size utf8_table1_size
76 #define _pcre_utf8_table2 utf8_table2
77 #define _pcre_utf8_table3 utf8_table3
78 #define _pcre_utf8_table4 utf8_table4
79 #define _pcre_utt utt
80 #define _pcre_utt_size utt_size
81 #define _pcre_utt_names utt_names
82 #define _pcre_OP_lengths OP_lengths
83
84 #include "pcre_tables.c"
85
86 /* It is possible to compile this test program without including support for
87 testing the POSIX interface, though this is not available via the standard
88 Makefile. */
89
90 #if !defined NOPOSIX
91 #include "pcreposix.h"
92 #endif
93
94 /* It is also possible, for the benefit of the version currently imported into
95 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
96 interface to the DFA matcher (NODFA), and without the doublecheck of the old
97 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
98 UTF8 support if PCRE is built without it. */
99
100 #ifndef SUPPORT_UTF8
101 #ifndef NOUTF8
102 #define NOUTF8
103 #endif
104 #endif
105 /*-------------------------------------------------------------------------------*/
106 /* Static variables */
107
108 static int callout_count = 0;
109 static int callout_fail_count = 0;
110 static int callout_fail_id = 0;
111
112 /* The buffers grow automatically if very long input lines are encountered. */
113
114 char *buffer = NULL;
115
116
117 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
118 int start_offset, int options, int *use_offsets, int use_size_offsets,
119 int flag, unsigned long int *limit, int errnumber);
120
121
122 /*************************************************
123 * Check match or recursion limit *
124 *************************************************/
125
check_match_limit(pcre * re,pcre_extra * extra,char * bptr,int len,int start_offset,int options,int * use_offsets,int use_size_offsets,int flag,unsigned long int * limit,int errnumber)126 static int check_match_limit(pcre *re, pcre_extra *extra, char *bptr, int len,
127 int start_offset, int options, int *use_offsets, int use_size_offsets,
128 int flag, unsigned long int *limit, int errnumber)
129 {
130 int count;
131 int min = 0;
132 int mid = 64;
133 int max = -1;
134
135 extra->flags |= flag;
136
137 for (;;)
138 {
139 *limit = mid;
140
141 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
142 use_offsets, use_size_offsets);
143
144 if (count == errnumber)
145 {
146 min = mid;
147 mid = (mid == max - 1) ? max : (max > 0) ? (min + max) / 2 : mid * 2;
148 }
149
150 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
151 count == PCRE_ERROR_PARTIAL)
152 {
153 if (mid == min + 1)
154 {
155 break;
156 }
157 max = mid;
158 mid = (min + mid) / 2;
159 }
160 else
161 {
162 break; /* Some other error */
163 }
164 }
165
166 extra->flags &= ~flag;
167 return count;
168 }
169
170
171 /*************************************************
172 * Algorithm *
173 *************************************************/
174
175 /* Read lines from named file or stdin and write to named file or stdout; lines
176 consist of a regular expression, in delimiters and optionally followed by
177 options, followed by a set of test data, terminated by an empty line. */
178
pcre_private(const char * INPUT_LINE,const char * INPUT_PAT,int * Output_Start,int * Output_End,char *** _pstCapturedString,int * _piCapturedStringCount)179 pcre_error_code pcre_private(const char *INPUT_LINE, const char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
180 {
181 /* ALL strings are managed as UTF-8 by default */
182 int options = PCRE_UTF8;
183 int size_offsets = 45;
184 int size_offsets_max;
185 int *offsets = NULL;
186 int all_use_dfa = 0;
187 BOOL LOOP_PCRE_TST = FALSE;
188
189 /* These vectors store, end-to-end, a list of captured substring names. Assume
190 that 1024 is plenty long enough for the few names we'll be testing. */
191
192 char copynames[1024];
193 char getnames[1024];
194
195 char *copynamesptr = NULL;
196 char *getnamesptr = NULL;
197
198 int rc = 0;
199 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
200 if (rc != 1)
201 {
202 return UTF8_NOT_SUPPORTED;
203 }
204
205 /* bug 3891 */
206 /* backslash characters are not interpreted for input */
207 buffer = strsub(INPUT_LINE, "\\", "\\\\");
208
209 size_offsets_max = size_offsets;
210 offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
211 if (offsets == NULL)
212 {
213 if (buffer)
214 {
215 FREE(buffer);
216 buffer = NULL;
217 }
218 return NOT_ENOUGH_MEMORY_FOR_VECTOR;
219 }
220 /* Main loop */
221 LOOP_PCRE_TST = FALSE;
222 while (!LOOP_PCRE_TST)
223 {
224 pcre *re = NULL;
225 pcre_extra *extra = NULL;
226 const char *error = NULL;
227 char *back_p = NULL;
228 char *p = NULL;
229 char *pp = NULL;
230 char *ppp = NULL;
231 const unsigned char *tables = NULL;
232 int do_G = 0;
233 int do_g = 0;
234 int erroroffset = 0, len = 0, delimiter;
235
236 LOOP_PCRE_TST = TRUE;
237 p = os_strdup(INPUT_PAT);
238 back_p = p;
239 while (isspace(*p))
240 {
241 p++;
242 }
243 if (*p == 0)
244 {
245 FREE(back_p);
246 continue;
247 }
248 /* In-line pattern (the usual case). Get the delimiter and seek the end of
249 the pattern; if is isn't complete, read more. */
250
251 delimiter = *p++;
252
253 if (isalnum(delimiter) || delimiter == '\\')
254 {
255 if (buffer)
256 {
257 FREE(buffer);
258 buffer = NULL;
259 }
260 if (offsets)
261 {
262 FREE(offsets);
263 offsets = NULL;
264 }
265 if (back_p)
266 {
267 FREE(back_p);
268 back_p = NULL;
269 }
270 return DELIMITER_NOT_ALPHANUMERIC;
271 }
272
273 pp = p;
274
275 while (*pp != 0)
276 {
277 if (*pp == '\\' && pp[1] != 0)
278 {
279 pp++;
280 }
281 else if (*pp == delimiter)
282 {
283 break;
284 }
285 pp++;
286 }
287
288 /* If the delimiter can't be found, it's a syntax error */
289 if (*pp == 0)
290 {
291 if (buffer)
292 {
293 FREE(buffer);
294 buffer = NULL;
295 }
296 if (offsets)
297 {
298 FREE(offsets);
299 offsets = NULL;
300 }
301 if (back_p)
302 {
303 FREE(back_p);
304 back_p = NULL;
305 }
306 if (offsets)
307 {
308 FREE(offsets);
309 }
310 return CAN_NOT_COMPILE_PATTERN;
311 }
312
313 /* If the first character after the delimiter is backslash, make
314 the pattern end with backslash. This is purely to provide a way
315 of testing for the error message when a pattern ends with backslash. */
316
317 if (pp[1] == '\\')
318 {
319 *pp++ = '\\';
320 }
321
322 /* Terminate the pattern at the delimiter, and save a copy of the pattern
323 for callouts. */
324
325 *pp++ = 0;
326
327 /* Look for options after final delimiter */
328
329 //options = 8192;
330
331 while (*pp != 0)
332 {
333 switch (*pp++)
334 {
335 case 'f':
336 options |= PCRE_FIRSTLINE;
337 break;
338 case 'g':
339 do_g = 1;
340 break;
341 case 'i':
342 options |= PCRE_CASELESS;
343 break;
344 case 'm':
345 options |= PCRE_MULTILINE;
346 break;
347 case 's':
348 options |= PCRE_DOTALL;
349 break;
350 case 'x':
351 options |= PCRE_EXTENDED;
352 break;
353 case '+':
354 break;
355 case 'A':
356 options |= PCRE_ANCHORED;
357 break;
358 case 'B':
359 break;
360 case 'C':
361 options |= PCRE_AUTO_CALLOUT;
362 break;
363 case 'D':
364 break;
365 case 'E':
366 options |= PCRE_DOLLAR_ENDONLY;
367 break;
368 case 'F':
369 break;
370 case 'G':
371 do_G = 1;
372 break;
373 case 'I':
374 break;
375 case 'J':
376 options |= PCRE_DUPNAMES;
377 break;
378 case 'M':
379 break;
380 case 'N':
381 options |= PCRE_NO_AUTO_CAPTURE;
382 break;
383 case 'S':
384 break;
385 case 'U':
386 options |= PCRE_UNGREEDY;
387 break;
388 case 'X':
389 options |= PCRE_EXTRA;
390 break;
391 case 'Z':
392 break;
393 case '8':
394 {
395 int rc = 0;
396 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
397 if (rc != 1)
398 {
399 if (buffer)
400 {
401 FREE(buffer);
402 buffer = NULL;
403 }
404 if (back_p)
405 {
406 FREE(back_p);
407 back_p = NULL;
408 }
409 if (offsets)
410 {
411 FREE(offsets);
412 }
413 return UTF8_NOT_SUPPORTED;
414 }
415 options |= PCRE_UTF8;
416 }
417 break;
418 case '?':
419 options |= PCRE_NO_UTF8_CHECK;
420 break;
421 case 'L':
422 ppp = pp;
423 /* The '\r' test here is so that it works on Windows. */
424 /* The '0' test is just in case this is an unterminated line. */
425 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
426 {
427 ppp++;
428 }
429 *ppp = 0;
430 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
431 {
432 goto SKIP_DATA;
433 }
434
435 tables = pcre_maketables();
436 pp = ppp;
437 break;
438 case '>':
439 while (*pp != 0)
440 {
441 pp++;
442 }
443 while (isspace(pp[-1]))
444 {
445 pp--;
446 }
447 *pp = 0;
448 break;
449 case '<':
450 {
451 while (*pp++ != '>')
452 {
453 ;
454 }
455 }
456 break;
457 case '\r': /* So that it works in Windows */
458 case '\n':
459 case ' ':
460 break;
461
462 default:
463 goto SKIP_DATA;
464 }
465 }
466
467 /* Handle compiling via the POSIX interface, which doesn't support the
468 timing, showing, or debugging options, nor the ability to pass over
469 local character tables. */
470
471
472 {
473 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
474 /* Compilation failed; go back for another re, skipping to blank line
475 if non-interactive. */
476 if (re == NULL)
477 {
478 SKIP_DATA:
479 if (buffer)
480 {
481 FREE(buffer);
482 buffer = NULL;
483 }
484 if (offsets)
485 {
486 FREE(offsets);
487 offsets = NULL;
488 }
489 if (tables)
490 {
491 (*pcre_free)((void*)tables);
492 tables = NULL;
493 }
494 if (extra)
495 {
496 FREE(extra);
497 extra = NULL;
498 }
499 if (back_p)
500 {
501 FREE(back_p);
502 back_p = NULL;
503 }
504 return CAN_NOT_COMPILE_PATTERN;
505 }
506
507 } /* End of non-POSIX compile */
508
509 /* Read data lines and test them */
510 {
511 char *q = NULL;
512 char *bptr = NULL;
513 int *use_offsets = offsets;
514 int use_size_offsets = size_offsets;
515 int callout_data = 0;
516 int callout_data_set = 0;
517 int count = 0;
518 int c = 0;
519 int copystrings = 0;
520 int find_match_limit = 0;
521 int getstrings = 0;
522 int gmatched = 0;
523 int start_offset = 0;
524 int g_notempty = 0;
525 int use_dfa = 0;
526
527 options = 0;
528 *copynames = 0;
529 *getnames = 0;
530
531 copynamesptr = copynames;
532 getnamesptr = getnames;
533
534 callout_count = 0;
535 callout_fail_count = 999999;
536 callout_fail_id = -1;
537
538 if (extra != NULL)
539 {
540 extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
541 }
542 p = buffer;
543 bptr = q = buffer;
544 while ((c = *p++) != 0)
545 {
546 int i = 0;
547 int n = 0;
548
549 if (c == '\\') switch ((c = *p++))
550 {
551 case 'a':
552 c = 7;
553 break;
554 case 'b':
555 c = '\b';
556 break;
557 case 'e':
558 c = 27;
559 break;
560 case 'f':
561 c = '\f';
562 break;
563 case 'n':
564 c = '\n';
565 break;
566 case 'r':
567 c = '\r';
568 break;
569 case 't':
570 c = '\t';
571 break;
572 case 'v':
573 c = '\v';
574 break;
575 case '0':
576 case '1':
577 case '2':
578 case '3':
579 case '4':
580 case '5':
581 case '6':
582 case '7':
583 c -= '0';
584 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
585 {
586 c = c * 8 + *p++ - '0';
587 }
588 break;
589 case 'x':
590 /* Ordinary \x */
591 c = 0;
592 while (i++ < 2 && isxdigit(*p))
593 {
594 c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
595 p++;
596 }
597 break;
598 case 0: /* \ followed by EOF allows for an empty line */
599 p--;
600 continue;
601 case '>':
602 while (isdigit(*p))
603 {
604 start_offset = start_offset * 10 + *p++ - '0';
605 }
606 continue;
607 case 'A': /* Option setting */
608 options |= PCRE_ANCHORED;
609 continue;
610 case 'B':
611 options |= PCRE_NOTBOL;
612 continue;
613 case 'C':
614 if (isdigit(*p)) /* Set copy string */
615 {
616 while (isdigit(*p))
617 {
618 n = n * 10 + *p++ - '0';
619 }
620 copystrings |= 1 << n;
621 }
622 else if (isalnum(*p))
623 {
624 char *npp = copynamesptr;
625 while (isalnum(*p))
626 {
627 *npp++ = *p++;
628 }
629 *npp++ = 0;
630 *npp = 0;
631 pcre_get_stringnumber(re, (char *)copynamesptr);
632 copynamesptr = npp;
633 }
634 else if (*p == '+')
635 {
636 p++;
637 }
638 else if (*p == '-')
639 {
640 p++;
641 }
642 else if (*p == '!')
643 {
644 callout_fail_id = 0;
645 p++;
646 while (isdigit(*p))
647 {
648 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
649 }
650 callout_fail_count = 0;
651 if (*p == '!')
652 {
653 p++;
654 while (isdigit(*p))
655 {
656 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
657 }
658 }
659 }
660 else if (*p == '*')
661 {
662 int sign = 1;
663 callout_data = 0;
664 if (*(++p) == '-')
665 {
666 sign = -1;
667 p++;
668 }
669 while (isdigit(*p))
670 {
671 callout_data = callout_data * 10 + *p++ - '0';
672 }
673 callout_data *= sign;
674 callout_data_set = 1;
675 }
676 continue;
677 case 'G':
678 if (isdigit(*p))
679 {
680 while (isdigit(*p))
681 {
682 n = n * 10 + *p++ - '0';
683 }
684 getstrings |= 1 << n;
685 }
686 else if (isalnum(*p))
687 {
688 char *npp = getnamesptr;
689 while (isalnum(*p))
690 {
691 *npp++ = *p++;
692 }
693 *npp++ = 0;
694 *npp = 0;
695 pcre_get_stringnumber(re, (char *)getnamesptr);
696 getnamesptr = npp;
697 }
698 continue;
699 case 'L':
700 continue;
701 case 'M':
702 find_match_limit = 1;
703 continue;
704 case 'N':
705 options |= PCRE_NOTEMPTY;
706 continue;
707 case 'O':
708 while (isdigit(*p))
709 {
710 n = n * 10 + *p++ - '0';
711 }
712 if (n > size_offsets_max)
713 {
714 size_offsets_max = n;
715 if (offsets)
716 {
717 FREE(offsets);
718 }
719 use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
720 }
721 use_size_offsets = n;
722 if (n == 0)
723 {
724 use_offsets = NULL; /* Ensures it can't write to it */
725 }
726 continue;
727 case 'P':
728 options |= PCRE_PARTIAL;
729 continue;
730 case 'Q':
731 while (isdigit(*p))
732 {
733 n = n * 10 + *p++ - '0';
734 }
735 if (extra == NULL)
736 {
737 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
738 extra->flags = 0;
739 }
740 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
741 extra->match_limit_recursion = n;
742 continue;
743 case 'q':
744 while (isdigit(*p))
745 {
746 n = n * 10 + *p++ - '0';
747 }
748 if (extra == NULL)
749 {
750 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
751 extra->flags = 0;
752 }
753 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
754 extra->match_limit = n;
755 continue;
756 #if !defined NODFA
757 case 'R':
758 options |= PCRE_DFA_RESTART;
759 continue;
760 #endif
761 case 'S':
762
763 continue;
764 case 'Z':
765 options |= PCRE_NOTEOL;
766 continue;
767 case '?':
768 options |= PCRE_NO_UTF8_CHECK;
769 continue;
770 case '<':
771 {
772 while (*p++ != '>')
773 {
774 ;
775 }
776 }
777 continue;
778 }
779 *q++ = (char)c;
780 }
781 *q = 0;
782 len = (int)(q - buffer);
783 if ((all_use_dfa || use_dfa) && find_match_limit)
784 {
785 if (buffer)
786 {
787 FREE(buffer);
788 buffer = NULL;
789 }
790 if (offsets)
791 {
792 FREE(offsets);
793 offsets = NULL;
794 }
795 if (p)
796 {
797 FREE(p);
798 p = NULL;
799 }
800 if (re)
801 {
802 (*pcre_free)(re);
803 re = NULL;
804 }
805 if (tables)
806 {
807 (*pcre_free)((void*)tables);
808 tables = NULL;
809 }
810 if (extra)
811 {
812 FREE(extra);
813 extra = NULL;
814 }
815 return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
816 }
817 /* Handle matching via the POSIX interface, which does not
818 support timing or playing with the match limit or callout data. */
819 for (;; gmatched++) /* Loop for /g or /G */
820 {
821
822 /* If find_match_limit is set, we want to do repeated matches with
823 varying limits in order to find the minimum value for the match limit and
824 for the recursion limit. */
825
826 if (find_match_limit)
827 {
828 if (extra == NULL)
829 {
830 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
831 extra->flags = 0;
832 }
833
834 (void)check_match_limit(re, extra, bptr, len, start_offset,
835 options | g_notempty, use_offsets, use_size_offsets,
836 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
837 PCRE_ERROR_MATCHLIMIT);
838
839 count = check_match_limit(re, extra, bptr, len, start_offset,
840 options | g_notempty, use_offsets, use_size_offsets,
841 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
842 PCRE_ERROR_RECURSIONLIMIT);
843 }
844 /* If callout_data is set, use the interface with additional data */
845 else if (callout_data_set)
846 {
847 if (extra == NULL)
848 {
849 extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
850 extra->flags = 0;
851 }
852 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
853 extra->callout_data = &callout_data;
854 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
855 options | g_notempty, use_offsets, use_size_offsets);
856
857 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
858 }
859 /* The normal case is just to do the match once, with the default
860 value of match_limit. */
861 else
862 {
863 count = pcre_exec(re, extra, (char *)bptr, len,
864 start_offset, options | g_notempty, use_offsets, use_size_offsets);
865 if (count == 0)
866 {
867 count = use_size_offsets / 3;
868 }
869
870 //to retrieve backref count and values
871 if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
872 {
873 int i = 0;
874 int iErr = 0;
875
876 iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
877 //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);
878
879 if (*_piCapturedStringCount > 0)
880 {
881 *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
882 for (i = 0 ; i < *_piCapturedStringCount ; i++)
883 {
884 const char* pstSubstring = NULL;
885 pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
886 if (pstSubstring != NULL)
887 {
888 (*_pstCapturedString)[i] = os_strdup(pstSubstring);
889 }
890 else
891 {
892 //empty string is matching, so create it
893 (*_pstCapturedString)[i] = os_strdup("");
894 }
895
896 pcre_free_substring(pstSubstring);
897 }
898 }
899 }
900 }
901 /* Matched */
902 if (count >= 0)
903 {
904 int i, maxcount;
905 maxcount = use_size_offsets / 3;
906 /* This is a check against a lunatic return value. */
907 if (count > maxcount)
908 {
909 if (buffer)
910 {
911 FREE(buffer);
912 buffer = NULL;
913 }
914 if (offsets)
915 {
916 FREE(offsets);
917 offsets = NULL;
918 }
919 if (re)
920 {
921 (*pcre_free)(re);
922 re = NULL;
923 }
924 if (tables)
925 {
926 (*pcre_free)((void*)tables);
927 tables = NULL;
928 }
929 if (extra)
930 {
931 FREE(extra);
932 extra = NULL;
933 }
934 if (back_p)
935 {
936 FREE(back_p);
937 back_p = NULL;
938 }
939 return TOO_BIG_FOR_OFFSET_SIZE;
940 }
941
942 for (i = 0; i < count * 2; i += 2)
943 {
944 if (use_offsets[i] >= 0)
945 {
946 *Output_Start = use_offsets[i];
947 *Output_End = use_offsets[i + 1];
948 if (buffer)
949 {
950 FREE(buffer);
951 }
952
953 /* use_offsets = offsets no need to free use_offsets if we free offsets */
954 if (offsets)
955 {
956 FREE(offsets);
957 }
958
959 /* "re" allocated by pcre_compile (better to use free function associated)*/
960 if (re)
961 {
962 (*pcre_free)(re);
963 }
964
965 if (extra)
966 {
967 FREE(extra);
968 }
969 if (tables)
970 {
971 /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
972 (*pcre_free)((void *)tables);
973 tables = NULL;
974 setlocale(LC_CTYPE, "C");
975 }
976
977 if (back_p)
978 {
979 FREE(back_p);
980 back_p = NULL;
981 }
982 return PCRE_FINISHED_OK;
983 }
984 }
985
986 for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
987 {
988 char copybuffer[256];
989 pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
990 }
991
992 for (i = 0; i < 32; i++)
993 {
994 if ((getstrings & (1 << i)) != 0)
995 {
996 const char *substring;
997 pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
998 }
999 }
1000
1001 for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
1002 {
1003 const char *substring;
1004 pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
1005 }
1006
1007 }
1008 /* Failed to match. If this is a /g or /G loop and we previously set
1009 g_notempty after a null match, this is not necessarily the end. We want
1010 to advance the start offset, and continue. We won't be at the end of the
1011 string - that was checked before setting g_notempty.
1012 Complication arises in the case when the newline option is "any" or
1013 "anycrlf". If the previous match was at the end of a line terminated by
1014 CRLF, an advance of one character just passes the \r, whereas we should
1015 prefer the longer newline sequence, as does the code in pcre_exec().
1016 Fudge the offset value to achieve this.
1017
1018 Otherwise, in the case of UTF-8 matching, the advance must be one
1019 character, not one byte. */
1020 else
1021 {
1022 if (count == PCRE_ERROR_NOMATCH)
1023 {
1024 if (gmatched == 0)
1025 {
1026 if (tables)
1027 {
1028 (*pcre_free)((void *)tables);
1029 tables = NULL;
1030 }
1031 if (re)
1032 {
1033 (*pcre_free)((void *)re);
1034 re = NULL;
1035 }
1036 if (buffer)
1037 {
1038 FREE(buffer);
1039 buffer = NULL;
1040 }
1041 if (offsets)
1042 {
1043 FREE(offsets);
1044 }
1045 if (p)
1046 {
1047 FREE(back_p);
1048 back_p = NULL;
1049 }
1050 return NO_MATCH;
1051 }
1052 }
1053
1054 if (count == PCRE_ERROR_MATCHLIMIT )
1055 {
1056 if (tables)
1057 {
1058 (*pcre_free)((void *)tables);
1059 tables = NULL;
1060 }
1061 if (re)
1062 {
1063 (*pcre_free)((void *)re);
1064 re = NULL;
1065 }
1066 if (buffer)
1067 {
1068 FREE(buffer);
1069 buffer = NULL;
1070 }
1071 if (offsets)
1072 {
1073 FREE(offsets);
1074 offsets = NULL;
1075 }
1076 if (back_p)
1077 {
1078 FREE(back_p);
1079 back_p = NULL;
1080 }
1081 return MATCH_LIMIT;
1082 }
1083 break; /* Out of loop */
1084 }
1085
1086 /* If not /g or /G we are done */
1087 if (!do_g && !do_G)
1088 {
1089 break;
1090 }
1091
1092 /* If we have matched an empty string, first check to see if we are at
1093 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1094 what Perl's /g options does. This turns out to be rather cunning. First
1095 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1096 same point. If this fails (picked up above) we advance to the next
1097 character. */
1098
1099 g_notempty = 0;
1100
1101 if (use_offsets[0] == use_offsets[1])
1102 {
1103 if (use_offsets[0] == len)
1104 {
1105 break;
1106 }
1107 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1108 }
1109
1110 /* For /g, update the start offset, leaving the rest alone */
1111
1112 if (do_g)
1113 {
1114 start_offset = use_offsets[1];
1115 }
1116 /* For /G, update the pointer and length */
1117 else
1118 {
1119 bptr += use_offsets[1];
1120 len -= use_offsets[1];
1121 }
1122 } /* End of loop for /g and /G */
1123
1124 if (re)
1125 {
1126 (*pcre_free)(re);
1127 re = NULL;
1128 }
1129 if (extra)
1130 {
1131 FREE(extra);
1132 extra = NULL;
1133 }
1134 if (tables)
1135 {
1136 (*pcre_free)((void *)tables);
1137 tables = NULL;
1138 }
1139
1140 FREE(back_p);
1141 back_p = NULL;
1142 continue;
1143 } /* End of loop for data lines */
1144 }
1145
1146 if (buffer)
1147 {
1148 FREE(buffer);
1149 buffer = NULL;
1150 }
1151 if (offsets)
1152 {
1153 FREE(offsets);
1154 offsets = NULL;
1155 }
1156
1157 return PCRE_EXIT;
1158 }
1159 /*-------------------------------------------------------------------------------*/
wide_pcre_private(const wchar_t * _pwstInput,const wchar_t * _pwstPattern,int * _piStart,int * _piEnd,wchar_t *** _pstCapturedString,int * _piCapturedStringCount)1160 pcre_error_code wide_pcre_private(const wchar_t* _pwstInput, const wchar_t* _pwstPattern, int* _piStart, int* _piEnd, wchar_t*** _pstCapturedString, int* _piCapturedStringCount)
1161 {
1162 pcre_error_code iPcreStatus = PCRE_FINISHED_OK;
1163 int i = 0;
1164 int iStart = 0;
1165 int iEnd = 0;
1166
1167 char* pstInput = wide_string_to_UTF8(_pwstInput);
1168 char* pstPattern = wide_string_to_UTF8(_pwstPattern);
1169 char** pstCaptured = NULL;//(char**)MALLOC(sizeof(char*) * (strlen(pstInput) + 1));
1170
1171 iPcreStatus = pcre_private(pstInput, pstPattern, &iStart, &iEnd, &pstCaptured, _piCapturedStringCount);
1172 if (iPcreStatus == PCRE_FINISHED_OK && iStart != iEnd)
1173 {
1174 char* pstTempStart = NULL;
1175 char* pstTempEnd = NULL;
1176 wchar_t* pwstTempStart = NULL;
1177 wchar_t* pwstTempEnd = NULL;
1178
1179 pstTempStart = os_strdup(pstInput);
1180 pstTempEnd = os_strdup(pstInput);
1181 pstTempEnd[iEnd] = 0;
1182 pstTempStart[iStart] = 0;
1183
1184
1185 pwstTempStart = to_wide_string(pstTempStart);
1186 pwstTempEnd = to_wide_string(pstTempEnd);
1187
1188 *_piStart = (int)wcslen(pwstTempStart);
1189 *_piEnd = (int)wcslen(pwstTempEnd);
1190
1191 if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1192 {
1193 /*convert captured field in wide char*/
1194 *_pstCapturedString = (wchar_t**)MALLOC(sizeof(wchar_t*) * *_piCapturedStringCount);
1195 for (i = 0 ; i < *_piCapturedStringCount ; i++)
1196 {
1197 (*_pstCapturedString)[i] = to_wide_string(pstCaptured[i]);
1198 }
1199 freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1200 }
1201
1202 FREE(pstTempStart);
1203 FREE(pstTempEnd);
1204 FREE(pwstTempStart);
1205 FREE(pwstTempEnd);
1206 }
1207 else
1208 {
1209 *_piStart = iStart;
1210 *_piEnd = iEnd;
1211 if (_piCapturedStringCount && *_piCapturedStringCount > 0)
1212 {
1213 /*free unused captured field*/
1214 freeArrayOfString(pstCaptured, *_piCapturedStringCount);
1215 }
1216 }
1217
1218 FREE(pstInput);
1219 FREE(pstPattern);
1220 return iPcreStatus;
1221 }
1222 /*-------------------------------------------------------------------------------*/
1223