1 /* -*- mode: C; mode: fold -*-
2 Copyright (C) 2010-2017,2018 John E. Davis
3
4 This file is part of the S-Lang Library.
5
6 The S-Lang Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10
11 The S-Lang Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
19 USA.
20 */
21 #include "config.h"
22
23 #include <stdio.h>
24 #include <slang.h>
25 #include <string.h>
26 #include <pcre.h>
27
28 SLANG_MODULE(pcre);
29
30 static int PCRE_Type_Id = 0;
31
32 typedef struct
33 {
34 pcre *p;
35 pcre_extra *extra;
36 int *ovector;
37 unsigned int ovector_len; /* must be a multiple of 3 */
38 unsigned int num_matches; /* return value of pcre_exec (>= 1)*/
39 }
40 PCRE_Type;
41
free_pcre_type(PCRE_Type * pt)42 static void free_pcre_type (PCRE_Type *pt)
43 {
44 if (pt->ovector != NULL)
45 SLfree ((char *) pt->ovector);
46
47 SLfree ((char *) pt);
48 }
49
allocate_pcre_type(pcre * p,pcre_extra * extra)50 static SLang_MMT_Type *allocate_pcre_type (pcre *p, pcre_extra *extra)
51 {
52 PCRE_Type *pt;
53 SLang_MMT_Type *mmt;
54 int ovector_len;
55
56 pt = (PCRE_Type *) SLmalloc (sizeof (PCRE_Type));
57 if (pt == NULL)
58 return NULL;
59 memset ((char *) pt, 0, sizeof (PCRE_Type));
60
61 pt->p = p;
62 pt->extra = extra;
63
64 if (0 != pcre_fullinfo (p, extra, PCRE_INFO_CAPTURECOUNT, &ovector_len))
65 {
66 free_pcre_type (pt);
67 SLang_verror (SL_INTRINSIC_ERROR, "pcre_fullinfo failed");
68 return NULL;
69 }
70
71 ovector_len += 1; /* allow for pattern matched */
72 ovector_len *= 3; /* required to be multiple of 3 */
73 if (NULL == (pt->ovector = (int *)SLmalloc (ovector_len * sizeof (int))))
74 {
75 free_pcre_type (pt);
76 return NULL;
77 }
78 pt->ovector_len = ovector_len;
79
80 if (NULL == (mmt = SLang_create_mmt (PCRE_Type_Id, (VOID_STAR) pt)))
81 {
82 free_pcre_type (pt);
83 return NULL;
84 }
85 return mmt;
86 }
87
_pcre_compile_1(char * pattern,int options)88 static int _pcre_compile_1 (char *pattern, int options)
89 {
90 pcre *p;
91 pcre_extra *extra;
92 SLCONST char *err;
93 int erroffset;
94 unsigned char *table;
95 SLang_MMT_Type *mmt;
96
97 table = NULL;
98 p = pcre_compile (pattern, options, &err, &erroffset, table);
99 if (NULL == p)
100 {
101 SLang_verror (SL_Parse_Error, "Error compiling pattern '%s' at offset %d: %s",
102 pattern, erroffset, err);
103 return -1;
104 }
105
106 extra = pcre_study (p, 0, &err);
107 /* apparantly, a NULL return is ok */
108 if (err != NULL)
109 {
110 SLang_verror (SL_INTRINSIC_ERROR, "pcre_study failed: %s", err);
111 pcre_free (p);
112 return -1;
113 }
114
115 if (NULL == (mmt = allocate_pcre_type (p, extra)))
116 {
117 pcre_free ((char *) p);
118 pcre_free ((char *) extra);
119 return -1;
120 }
121
122 if (-1 == SLang_push_mmt (mmt))
123 {
124 SLang_free_mmt (mmt);
125 return -1;
126 }
127 return 0;
128 }
129
_pcre_compile(void)130 static void _pcre_compile (void)
131 {
132 char *pattern;
133 int options = 0;
134
135 switch (SLang_Num_Function_Args)
136 {
137 case 2:
138 if (-1 == SLang_pop_integer (&options))
139 return;
140 /* drop */
141 case 1:
142 default:
143 if (-1 == SLang_pop_slstring (&pattern))
144 return;
145 }
146 (void) _pcre_compile_1 (pattern, options);
147 SLang_free_slstring (pattern);
148 }
149
150 /* returns number of matches */
_pcre_exec_1(PCRE_Type * pt,char * str,unsigned int len,int pos,int options)151 static int _pcre_exec_1 (PCRE_Type *pt, char *str, unsigned int len, int pos, int options)
152 {
153 int rc;
154
155 pt->num_matches = 0;
156 if ((unsigned int) pos > len)
157 return 0;
158
159 rc = pcre_exec (pt->p, pt->extra, str, len, pos,
160 options, pt->ovector, pt->ovector_len);
161
162 if (rc == PCRE_ERROR_NOMATCH)
163 return 0;
164
165 if (rc <= 0)
166 {
167 SLang_verror (SL_INTRINSIC_ERROR, "pcre_exec returned %d", rc);
168 return -1;
169 }
170 pt->num_matches = (unsigned int) rc;
171 return rc;
172 }
173
_pcre_exec(void)174 static int _pcre_exec (void)
175 {
176 PCRE_Type *p;
177 SLang_MMT_Type *mmt;
178 char *str;
179 SLang_BString_Type *bstr = NULL;
180 SLstrlen_Type len;
181 int pos = 0;
182 int options = 0;
183 int ret = -1;
184
185 switch (SLang_Num_Function_Args)
186 {
187 case 4:
188 if (-1 == SLang_pop_integer (&options))
189 return -1;
190 /* drop */
191 case 3:
192 /* drop */
193 if (-1 == SLang_pop_integer (&pos))
194 return -1;
195 /* drop */
196 default:
197 switch (SLang_peek_at_stack())
198 {
199 case SLANG_STRING_TYPE:
200 if (-1 == SLang_pop_slstring (&str))
201 return -1;
202 len = strlen (str);
203 break;
204
205 case SLANG_BSTRING_TYPE:
206 default:
207 if (-1 == SLang_pop_bstring(&bstr))
208 return -1;
209 str = (char *)SLbstring_get_pointer(bstr, &len);
210 if (str == NULL)
211 {
212 SLbstring_free (bstr);
213 return -1;
214 }
215 break;
216 }
217 }
218
219 if (NULL == (mmt = SLang_pop_mmt (PCRE_Type_Id)))
220 goto free_and_return;
221 p = (PCRE_Type *)SLang_object_from_mmt (mmt);
222
223 ret = _pcre_exec_1 (p, str, len, pos, options);
224
225 free_and_return:
226
227 SLang_free_mmt (mmt);
228 if (bstr != NULL)
229 SLbstring_free (bstr);
230 else
231 SLang_free_slstring (str);
232 return ret;
233 }
234
get_nth_start_stop(PCRE_Type * pt,unsigned int n,SLstrlen_Type * a,SLstrlen_Type * b)235 static int get_nth_start_stop (PCRE_Type *pt, unsigned int n,
236 SLstrlen_Type *a, SLstrlen_Type *b)
237 {
238 int start, stop;
239
240 if (n >= pt->num_matches)
241 return -1;
242
243 start = pt->ovector[2*n];
244 stop = pt->ovector[2*n+1];
245 if ((start < 0) || (stop < start))
246 return -1;
247
248 *a = (unsigned int) start;
249 *b = (unsigned int) stop;
250 return 0;
251 }
252
_pcre_nth_match(PCRE_Type * pt,int * np)253 static void _pcre_nth_match (PCRE_Type *pt, int *np)
254 {
255 SLuindex_Type start, stop;
256 SLang_Array_Type *at;
257 SLindex_Type two = 2;
258 int *data;
259
260 if (-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
261 {
262 SLang_push_null ();
263 return;
264 }
265
266 if (NULL == (at = SLang_create_array (SLANG_INT_TYPE, 0, NULL, &two, 1)))
267 return;
268
269 data = (int *)at->data;
270 data[0] = (int)start;
271 data[1] = (int)stop;
272 (void) SLang_push_array (at, 1);
273 }
274
_pcre_nth_substr(PCRE_Type * pt,char * str,int * np)275 static void _pcre_nth_substr (PCRE_Type *pt, char *str, int *np)
276 {
277 SLstrlen_Type start, stop;
278 SLstrlen_Type len;
279
280 len = strlen (str);
281
282 if ((-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
283 || (start > len) || (stop > len))
284 {
285 SLang_push_null ();
286 return;
287 }
288
289 str = SLang_create_nslstring (str + start, stop - start);
290 (void) SLang_push_string (str);
291 SLang_free_slstring (str);
292 }
293
294 /* This function converts a slang RE to a pcre expression. It performs the
295 * following transformations:
296 * ( --> \(
297 * ) --> \)
298 * # --> \#
299 * | --> \|
300 * { --> \{
301 * } --> \}
302 * \< --> \b
303 * \> --> \b
304 * \C --> (?i)
305 * \c --> (?-i)
306 * \( --> (
307 * \) --> )
308 * \{ --> {
309 * \} --> }
310 * Anything else?
311 */
_slang_to_pcre(char * slpattern)312 static char *_slang_to_pcre (char *slpattern)
313 {
314 char *pattern, *p, *s;
315 SLstrlen_Type len;
316 int in_bracket;
317 char ch;
318
319 len = strlen (slpattern);
320 pattern = (char *)SLmalloc (3*len + 1);
321 if (pattern == NULL)
322 return NULL;
323
324 p = pattern;
325 s = slpattern;
326 in_bracket = 0;
327 while ((ch = *s++) != 0)
328 {
329 switch (ch)
330 {
331 case '{':
332 case '}':
333 case '(':
334 case ')':
335 case '#':
336 case '|':
337 if (0 == in_bracket) *p++ = '\\';
338 *p++ = ch;
339 break;
340
341 case '[':
342 in_bracket = 1;
343 *p++ = ch;
344 break;
345
346 case ']':
347 in_bracket = 0;
348 *p++ = ch;
349 break;
350
351 case '\\':
352 ch = *s++;
353 switch (ch)
354 {
355 case 0:
356 s--;
357 break;
358
359 case '<':
360 case '>':
361 *p++ = '\\'; *p++ = 'b';
362 break;
363
364 case '(':
365 case ')':
366 case '{':
367 case '}':
368 *p++ = ch;
369 break;
370
371 case 'C':
372 *p++ = '('; *p++ = '?'; *p++ = 'i'; *p++ = ')';
373 break;
374 case 'c':
375 *p++ = '('; *p++ = '?'; *p++ = '-'; *p++ = 'i'; *p++ = ')';
376 break;
377
378 default:
379 *p++ = '\\';
380 *p++ = ch;
381 }
382 break;
383
384 default:
385 *p++ = ch;
386 break;
387 }
388 }
389 *p = 0;
390
391 s = SLang_create_slstring (pattern);
392 SLfree (pattern);
393 return s;
394 }
395
slang_to_pcre(char * pattern)396 static void slang_to_pcre (char *pattern)
397 {
398 /* NULL ok in code below */
399 pattern = _slang_to_pcre (pattern);
400 (void) SLang_push_string (pattern);
401 SLang_free_slstring (pattern);
402 }
403
destroy_pcre(SLtype type,VOID_STAR f)404 static void destroy_pcre (SLtype type, VOID_STAR f)
405 {
406 PCRE_Type *pt;
407 (void) type;
408
409 pt = (PCRE_Type *) f;
410 if (pt->extra != NULL)
411 pcre_free ((char *) pt->extra);
412 if (pt->p != NULL)
413 pcre_free ((char *) pt->p);
414 free_pcre_type (pt);
415 }
416
417 #define DUMMY_PCRE_TYPE ((SLtype)-1)
418 #define P DUMMY_PCRE_TYPE
419 #define I SLANG_INT_TYPE
420 #define V SLANG_VOID_TYPE
421 #define S SLANG_STRING_TYPE
422 static SLang_Intrin_Fun_Type PCRE_Intrinsics [] =
423 {
424 MAKE_INTRINSIC_0("pcre_exec", _pcre_exec, I),
425 MAKE_INTRINSIC_0("pcre_compile", _pcre_compile, V),
426 MAKE_INTRINSIC_2("pcre_nth_match", _pcre_nth_match, V, P, I),
427 MAKE_INTRINSIC_3("pcre_nth_substr", _pcre_nth_substr, V, P, S, I),
428 MAKE_INTRINSIC_1("slang_to_pcre", slang_to_pcre, V, S),
429 SLANG_END_INTRIN_FUN_TABLE
430 };
431
432 static SLang_IConstant_Type PCRE_Consts [] =
433 {
434 /* compile options */
435 #ifndef PCRE_ANCHORED
436 # define PCRE_ANCHORED 0
437 #endif
438 MAKE_ICONSTANT("PCRE_ANCHORED", PCRE_ANCHORED),
439 #ifndef PCRE_AUTO_CALLOUT
440 # define PCRE_AUTO_CALLOUT 0
441 #endif
442 MAKE_ICONSTANT("PCRE_AUTO_CALLOUT", PCRE_AUTO_CALLOUT),
443 #ifndef PCRE_BSR_ANYCRLF
444 # define PCRE_BSR_ANYCRLF 0
445 #endif
446 MAKE_ICONSTANT("PCRE_BSR_ANYCRLF", PCRE_BSR_ANYCRLF),
447 #ifndef PCRE_BSR_UNICODE
448 # define PCRE_BSR_UNICODE 0
449 #endif
450 MAKE_ICONSTANT("PCRE_BSR_UNICODE", PCRE_BSR_UNICODE),
451 #ifndef PCRE_CASELESS
452 # define PCRE_CASELESS 0
453 #endif
454 MAKE_ICONSTANT("PCRE_CASELESS", PCRE_CASELESS),
455 #ifndef PCRE_DUPNAMES
456 # define PCRE_DUPNAMES 0
457 #endif
458 MAKE_ICONSTANT("PCRE_DUPNAMES", PCRE_DUPNAMES),
459 #ifndef PCRE_DOLLAR_ENDONLY
460 # define PCRE_DOLLAR_ENDONLY 0
461 #endif
462 MAKE_ICONSTANT("PCRE_DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY),
463 #ifndef PCRE_DOTALL
464 # define PCRE_DOTALL 0
465 #endif
466 MAKE_ICONSTANT("PCRE_DOTALL", PCRE_DOTALL),
467 #ifndef PCRE_EXTENDED
468 # define PCRE_EXTENDED 0
469 #endif
470 MAKE_ICONSTANT("PCRE_EXTENDED", PCRE_EXTENDED),
471 #ifndef PCRE_EXTRA
472 # define PCRE_EXTRA 0
473 #endif
474 MAKE_ICONSTANT("PCRE_EXTRA", PCRE_EXTRA),
475 #ifndef PCRE_FIRSTLINE
476 # define PCRE_FIRSTLINE 0
477 #endif
478 MAKE_ICONSTANT("PCRE_FIRSTLINE", PCRE_FIRSTLINE),
479 #ifndef PCRE_JAVASCRIPT_COMPAT
480 # define PCRE_JAVASCRIPT_COMPAT 0
481 #endif
482 MAKE_ICONSTANT("PCRE_JAVASCRIPT_COMPAT", PCRE_JAVASCRIPT_COMPAT),
483 #ifndef PCRE_MULTILINE
484 # define PCRE_MULTILINE 0
485 #endif
486 MAKE_ICONSTANT("PCRE_MULTILINE", PCRE_MULTILINE),
487 #ifndef PCRE_NEVER_UTF
488 # define PCRE_NEVER_UTF 0
489 #endif
490 MAKE_ICONSTANT("PCRE_NEVER_UTF", PCRE_NEVER_UTF),
491 #ifndef PCRE_NEWLINE_ANY
492 # define PCRE_NEWLINE_ANY 0
493 #endif
494 MAKE_ICONSTANT("PCRE_NEWLINE_ANY", PCRE_NEWLINE_ANY),
495 #ifndef PCRE_NEWLINE_ANYCRLF
496 # define PCRE_NEWLINE_ANYCRLF 0
497 #endif
498 MAKE_ICONSTANT("PCRE_NEWLINE_ANYCRLF", PCRE_NEWLINE_ANYCRLF),
499 #ifndef PCRE_NEWLINE_CR
500 # define PCRE_NEWLINE_CR 0
501 #endif
502 MAKE_ICONSTANT("PCRE_NEWLINE_CR", PCRE_NEWLINE_CR),
503 #ifndef PCRE_NEWLINE_CRLF
504 # define PCRE_NEWLINE_CRLF 0
505 #endif
506 MAKE_ICONSTANT("PCRE_NEWLINE_CRLF", PCRE_NEWLINE_CRLF),
507 #ifndef PCRE_NEWLINE_LF
508 # define PCRE_NEWLINE_LF 0
509 #endif
510 MAKE_ICONSTANT("PCRE_NEWLINE_LF", PCRE_NEWLINE_LF),
511 #ifndef PCRE_NO_START_OPTIMIZE
512 # define PCRE_NO_START_OPTIMIZE 0
513 #endif
514 MAKE_ICONSTANT("PCRE_NO_START_OPTIMIZE", PCRE_NO_START_OPTIMIZE),
515 #ifndef PCRE_NOTEMPTY
516 # define PCRE_NOTEMPTY 0
517 #endif
518 MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
519 #ifndef PCRE_NO_AUTO_CAPTURE
520 # define PCRE_NO_AUTO_CAPTURE 0
521 #endif
522 MAKE_ICONSTANT("PCRE_NO_AUTO_CAPTURE", PCRE_NO_AUTO_CAPTURE),
523 #ifndef PCRE_NO_AUTO_POSSESS
524 # define PCRE_NO_AUTO_POSSESS 0
525 #endif
526 MAKE_ICONSTANT("PCRE_NO_AUTO_POSSESS", PCRE_NO_AUTO_POSSESS),
527 #ifndef PCRE_NO_UTF8_CHECK
528 # define PCRE_NO_UTF8_CHECK 0
529 #endif
530 MAKE_ICONSTANT("PCRE_NO_UTF8_CHECK", PCRE_NO_UTF8_CHECK),
531 #ifndef PCRE_UCP
532 # define PCRE_UCP 0
533 #endif
534 MAKE_ICONSTANT("PCRE_UCP", PCRE_UCP),
535 #ifndef PCRE_UNGREEDY
536 # define PCRE_UNGREEDY 0
537 #endif
538 MAKE_ICONSTANT("PCRE_UNGREEDY", PCRE_UNGREEDY),
539 #ifndef PCRE_UTF8
540 # define PCRE_UTF8 0
541 #endif
542 MAKE_ICONSTANT("PCRE_UTF8", PCRE_UTF8),
543
544 /* exec options */
545 #ifndef PCRE_NOTBOL
546 # define PCRE_NOTBOL 0
547 #endif
548 MAKE_ICONSTANT("PCRE_NOTBOL", PCRE_NOTBOL),
549 #ifndef PCRE_NOTEOL
550 # define PCRE_NOTEOL 0
551 #endif
552 MAKE_ICONSTANT("PCRE_NOTEOL", PCRE_NOTEOL),
553 #ifndef PCRE_NOTEMPTY
554 # define PCRE_NOTEMPTY 0
555 #endif
556 MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
557 #ifndef PCRE_PARTIAL_SOFT
558 # define PCRE_PARTIAL_SOFT 0
559 #endif
560 MAKE_ICONSTANT("PCRE_PARTIAL_SOFT", PCRE_PARTIAL_SOFT),
561 #ifndef PCRE_DFA_SHORTEST
562 # define PCRE_DFA_SHORTEST 0
563 #endif
564 MAKE_ICONSTANT("PCRE_DFA_SHORTEST", PCRE_DFA_SHORTEST),
565 #ifndef PCRE_DFA_RESTART
566 # define PCRE_DFA_RESTART 0
567 #endif
568 MAKE_ICONSTANT("PCRE_DFA_RESTART", PCRE_DFA_RESTART),
569 #ifndef PCRE_PARTIAL_HARD
570 # define PCRE_PARTIAL_HARD 0
571 #endif
572 MAKE_ICONSTANT("PCRE_PARTIAL_HARD", PCRE_PARTIAL_HARD),
573 #ifndef PCRE_NOTEMPTY_ATSTART
574 # define PCRE_NOTEMPTY_ATSTART 0
575 #endif
576 MAKE_ICONSTANT("PCRE_NOTEMPTY_ATSTART", PCRE_NOTEMPTY_ATSTART),
577
578 SLANG_END_ICONST_TABLE
579 };
580
581 #undef P
582 #undef I
583 #undef V
584 #undef S
585
register_pcre_type(void)586 static int register_pcre_type (void)
587 {
588 SLang_Class_Type *cl;
589
590 if (PCRE_Type_Id != 0)
591 return 0;
592
593 if (NULL == (cl = SLclass_allocate_class ("PCRE_Type")))
594 return -1;
595
596 if (-1 == SLclass_set_destroy_function (cl, destroy_pcre))
597 return -1;
598
599 /* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
600 * type.
601 */
602 if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (PCRE_Type), SLANG_CLASS_TYPE_MMT))
603 return -1;
604
605 PCRE_Type_Id = SLclass_get_class_id (cl);
606 if (-1 == SLclass_patch_intrin_fun_table1 (PCRE_Intrinsics, DUMMY_PCRE_TYPE, PCRE_Type_Id))
607 return -1;
608
609 return 0;
610 }
611
do_malloc(size_t n)612 static void *do_malloc (size_t n)
613 {
614 return (void *) SLmalloc (n);
615 }
616
do_free(void * x)617 static void do_free (void *x)
618 {
619 SLfree ((char *) x);
620 }
621
init_pcre_module_ns(char * ns_name)622 int init_pcre_module_ns (char *ns_name)
623 {
624 SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
625 if (ns == NULL)
626 return -1;
627
628 if (-1 == register_pcre_type ())
629 return -1;
630
631 pcre_malloc = do_malloc;
632 pcre_free = do_free;
633
634 if ((-1 == SLns_add_intrin_fun_table (ns, PCRE_Intrinsics, "__PCRE__"))
635 || (-1 == SLns_add_iconstant_table (ns, PCRE_Consts, NULL)))
636 return -1;
637
638 return 0;
639 }
640
641 /* This function is optional */
deinit_pcre_module(void)642 void deinit_pcre_module (void)
643 {
644 }
645
646