1 /* -*- mode: C; mode: fold -*-
2 Copyright (C) 2010-2017,2018 John E. Davis
3 
4 This file is part of the S-Lang Library.
5 
6 The S-Lang Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
10 
11 The S-Lang Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
19 USA.
20 */
21 #include "config.h"
22 
23 #include <stdio.h>
24 #include <slang.h>
25 #include <string.h>
26 #include <pcre.h>
27 
28 SLANG_MODULE(pcre);
29 
30 static int PCRE_Type_Id = 0;
31 
32 typedef struct
33 {
34    pcre *p;
35    pcre_extra *extra;
36    int *ovector;
37    unsigned int ovector_len;	       /* must be a multiple of 3 */
38    unsigned int num_matches;	       /* return value of pcre_exec (>= 1)*/
39 }
40 PCRE_Type;
41 
free_pcre_type(PCRE_Type * pt)42 static void free_pcre_type (PCRE_Type *pt)
43 {
44    if (pt->ovector != NULL)
45      SLfree ((char *) pt->ovector);
46 
47    SLfree ((char *) pt);
48 }
49 
allocate_pcre_type(pcre * p,pcre_extra * extra)50 static SLang_MMT_Type *allocate_pcre_type (pcre *p, pcre_extra *extra)
51 {
52    PCRE_Type *pt;
53    SLang_MMT_Type *mmt;
54    int ovector_len;
55 
56    pt = (PCRE_Type *) SLmalloc (sizeof (PCRE_Type));
57    if (pt == NULL)
58      return NULL;
59    memset ((char *) pt, 0, sizeof (PCRE_Type));
60 
61    pt->p = p;
62    pt->extra = extra;
63 
64    if (0 != pcre_fullinfo (p, extra, PCRE_INFO_CAPTURECOUNT, &ovector_len))
65      {
66 	free_pcre_type (pt);
67 	SLang_verror (SL_INTRINSIC_ERROR, "pcre_fullinfo failed");
68 	return NULL;
69      }
70 
71    ovector_len += 1;		       /* allow for pattern matched */
72    ovector_len *= 3;		       /* required to be multiple of 3 */
73    if (NULL == (pt->ovector = (int *)SLmalloc (ovector_len * sizeof (int))))
74      {
75 	free_pcre_type (pt);
76 	return NULL;
77      }
78    pt->ovector_len = ovector_len;
79 
80    if (NULL == (mmt = SLang_create_mmt (PCRE_Type_Id, (VOID_STAR) pt)))
81      {
82 	free_pcre_type (pt);
83 	return NULL;
84      }
85    return mmt;
86 }
87 
_pcre_compile_1(char * pattern,int options)88 static int _pcre_compile_1 (char *pattern, int options)
89 {
90    pcre *p;
91    pcre_extra *extra;
92    SLCONST char *err;
93    int erroffset;
94    unsigned char *table;
95    SLang_MMT_Type *mmt;
96 
97    table = NULL;
98    p = pcre_compile (pattern, options, &err, &erroffset, table);
99    if (NULL == p)
100      {
101 	SLang_verror (SL_Parse_Error, "Error compiling pattern '%s' at offset %d: %s",
102 		      pattern, erroffset, err);
103 	return -1;
104      }
105 
106    extra = pcre_study (p, 0, &err);
107    /* apparantly, a NULL return is ok */
108    if (err != NULL)
109      {
110 	SLang_verror (SL_INTRINSIC_ERROR, "pcre_study failed: %s", err);
111 	pcre_free (p);
112 	return -1;
113      }
114 
115    if (NULL == (mmt = allocate_pcre_type (p, extra)))
116      {
117 	pcre_free ((char *) p);
118 	pcre_free ((char *) extra);
119 	return -1;
120      }
121 
122    if (-1 == SLang_push_mmt (mmt))
123      {
124 	SLang_free_mmt (mmt);
125 	return -1;
126      }
127    return 0;
128 }
129 
_pcre_compile(void)130 static void _pcre_compile (void)
131 {
132    char *pattern;
133    int options = 0;
134 
135    switch (SLang_Num_Function_Args)
136      {
137       case 2:
138 	if (-1 == SLang_pop_integer (&options))
139 	  return;
140 	/* drop */
141       case 1:
142       default:
143 	if (-1 == SLang_pop_slstring (&pattern))
144 	  return;
145      }
146    (void) _pcre_compile_1 (pattern, options);
147    SLang_free_slstring (pattern);
148 }
149 
150 /* returns number of matches */
_pcre_exec_1(PCRE_Type * pt,char * str,unsigned int len,int pos,int options)151 static int _pcre_exec_1 (PCRE_Type *pt, char *str, unsigned int len, int pos, int options)
152 {
153    int rc;
154 
155    pt->num_matches = 0;
156    if ((unsigned int) pos > len)
157      return 0;
158 
159    rc = pcre_exec (pt->p, pt->extra, str, len, pos,
160 		   options, pt->ovector, pt->ovector_len);
161 
162    if (rc == PCRE_ERROR_NOMATCH)
163      return 0;
164 
165    if (rc <= 0)
166      {
167 	SLang_verror (SL_INTRINSIC_ERROR, "pcre_exec returned %d", rc);
168 	return -1;
169      }
170    pt->num_matches = (unsigned int) rc;
171    return rc;
172 }
173 
_pcre_exec(void)174 static int _pcre_exec (void)
175 {
176    PCRE_Type *p;
177    SLang_MMT_Type *mmt;
178    char *str;
179    SLang_BString_Type *bstr = NULL;
180    SLstrlen_Type len;
181    int pos = 0;
182    int options = 0;
183    int ret = -1;
184 
185    switch (SLang_Num_Function_Args)
186      {
187       case 4:
188 	if (-1 == SLang_pop_integer (&options))
189 	  return -1;
190 	/* drop */
191       case 3:
192 	/* drop */
193 	if (-1 == SLang_pop_integer (&pos))
194 	  return -1;
195 	/* drop */
196       default:
197 	switch (SLang_peek_at_stack())
198 	  {
199 	   case SLANG_STRING_TYPE:
200 	     if (-1 == SLang_pop_slstring (&str))
201 	       return -1;
202 	     len = strlen (str);
203 	     break;
204 
205 	   case SLANG_BSTRING_TYPE:
206 	   default:
207 	     if (-1 == SLang_pop_bstring(&bstr))
208 	       return -1;
209 	     str = (char *)SLbstring_get_pointer(bstr, &len);
210 	     if (str == NULL)
211 	       {
212 		  SLbstring_free (bstr);
213 		  return -1;
214 	       }
215 	     break;
216 	  }
217      }
218 
219    if (NULL == (mmt = SLang_pop_mmt (PCRE_Type_Id)))
220      goto free_and_return;
221    p = (PCRE_Type *)SLang_object_from_mmt (mmt);
222 
223    ret = _pcre_exec_1 (p, str, len, pos, options);
224 
225 free_and_return:
226 
227    SLang_free_mmt (mmt);
228    if (bstr != NULL)
229      SLbstring_free (bstr);
230    else
231      SLang_free_slstring (str);
232    return ret;
233 }
234 
get_nth_start_stop(PCRE_Type * pt,unsigned int n,SLstrlen_Type * a,SLstrlen_Type * b)235 static int get_nth_start_stop (PCRE_Type *pt, unsigned int n,
236 			       SLstrlen_Type *a, SLstrlen_Type *b)
237 {
238    int start, stop;
239 
240    if (n >= pt->num_matches)
241      return -1;
242 
243    start = pt->ovector[2*n];
244    stop = pt->ovector[2*n+1];
245    if ((start < 0) || (stop < start))
246      return -1;
247 
248    *a = (unsigned int) start;
249    *b = (unsigned int) stop;
250    return 0;
251 }
252 
_pcre_nth_match(PCRE_Type * pt,int * np)253 static void _pcre_nth_match (PCRE_Type *pt, int *np)
254 {
255    SLuindex_Type start, stop;
256    SLang_Array_Type *at;
257    SLindex_Type two = 2;
258    int *data;
259 
260    if (-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
261      {
262 	SLang_push_null ();
263 	return;
264      }
265 
266    if (NULL == (at = SLang_create_array (SLANG_INT_TYPE, 0, NULL, &two, 1)))
267      return;
268 
269    data = (int *)at->data;
270    data[0] = (int)start;
271    data[1] = (int)stop;
272    (void) SLang_push_array (at, 1);
273 }
274 
_pcre_nth_substr(PCRE_Type * pt,char * str,int * np)275 static void _pcre_nth_substr (PCRE_Type *pt, char *str, int *np)
276 {
277    SLstrlen_Type start, stop;
278    SLstrlen_Type len;
279 
280    len = strlen (str);
281 
282    if ((-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
283        || (start > len) || (stop > len))
284      {
285 	SLang_push_null ();
286 	return;
287      }
288 
289    str = SLang_create_nslstring (str + start, stop - start);
290    (void) SLang_push_string (str);
291    SLang_free_slstring (str);
292 }
293 
294 /* This function converts a slang RE to a pcre expression.  It performs the
295  * following transformations:
296  *    (     -->   \(
297  *    )     -->   \)
298  *    #     -->   \#
299  *    |     -->   \|
300  *    {     -->   \{
301  *    }     -->   \}
302  *   \<     -->   \b
303  *   \>     -->   \b
304  *   \C     -->   (?i)
305  *   \c     -->   (?-i)
306  *   \(     -->   (
307  *   \)     -->   )
308  *   \{     -->   {
309  *   \}     -->   }
310  * Anything else?
311  */
_slang_to_pcre(char * slpattern)312 static char *_slang_to_pcre (char *slpattern)
313 {
314    char *pattern, *p, *s;
315    SLstrlen_Type len;
316    int in_bracket;
317    char ch;
318 
319    len = strlen (slpattern);
320    pattern = (char *)SLmalloc (3*len + 1);
321    if (pattern == NULL)
322      return NULL;
323 
324    p = pattern;
325    s = slpattern;
326    in_bracket = 0;
327    while ((ch = *s++) != 0)
328      {
329 	switch (ch)
330 	  {
331 	   case '{':
332 	   case '}':
333 	   case '(':
334 	   case ')':
335 	   case '#':
336 	   case '|':
337 	     if (0 == in_bracket) *p++ = '\\';
338 	     *p++ = ch;
339 	     break;
340 
341 	   case '[':
342 	     in_bracket = 1;
343 	     *p++ = ch;
344 	     break;
345 
346 	   case ']':
347 	     in_bracket = 0;
348 	     *p++ = ch;
349 	     break;
350 
351 	   case '\\':
352 	     ch = *s++;
353 	     switch (ch)
354 	       {
355 		case 0:
356 		  s--;
357 		  break;
358 
359 		case '<':
360 		case '>':
361 		  *p++ = '\\'; *p++ = 'b';
362 		  break;
363 
364 		case '(':
365 		case ')':
366 		case '{':
367 		case '}':
368 		  *p++ = ch;
369 		  break;
370 
371 		case 'C':
372 		  *p++ = '('; *p++ = '?'; *p++ = 'i'; *p++ = ')';
373 		  break;
374 		case 'c':
375 		  *p++ = '('; *p++ = '?'; *p++ = '-'; *p++ = 'i'; *p++ = ')';
376 		  break;
377 
378 		default:
379 		  *p++ = '\\';
380 		  *p++ = ch;
381 	       }
382 	     break;
383 
384 	   default:
385 	     *p++ = ch;
386 	     break;
387 	  }
388      }
389    *p = 0;
390 
391    s = SLang_create_slstring (pattern);
392    SLfree (pattern);
393    return s;
394 }
395 
slang_to_pcre(char * pattern)396 static void slang_to_pcre (char *pattern)
397 {
398    /* NULL ok in code below */
399    pattern = _slang_to_pcre (pattern);
400    (void) SLang_push_string (pattern);
401    SLang_free_slstring (pattern);
402 }
403 
destroy_pcre(SLtype type,VOID_STAR f)404 static void destroy_pcre (SLtype type, VOID_STAR f)
405 {
406    PCRE_Type *pt;
407    (void) type;
408 
409    pt = (PCRE_Type *) f;
410    if (pt->extra != NULL)
411      pcre_free ((char *) pt->extra);
412    if (pt->p != NULL)
413      pcre_free ((char *) pt->p);
414    free_pcre_type (pt);
415 }
416 
417 #define DUMMY_PCRE_TYPE ((SLtype)-1)
418 #define P DUMMY_PCRE_TYPE
419 #define I SLANG_INT_TYPE
420 #define V SLANG_VOID_TYPE
421 #define S SLANG_STRING_TYPE
422 static SLang_Intrin_Fun_Type PCRE_Intrinsics [] =
423 {
424    MAKE_INTRINSIC_0("pcre_exec", _pcre_exec, I),
425    MAKE_INTRINSIC_0("pcre_compile", _pcre_compile, V),
426    MAKE_INTRINSIC_2("pcre_nth_match", _pcre_nth_match, V, P, I),
427    MAKE_INTRINSIC_3("pcre_nth_substr", _pcre_nth_substr, V, P, S, I),
428    MAKE_INTRINSIC_1("slang_to_pcre", slang_to_pcre, V, S),
429    SLANG_END_INTRIN_FUN_TABLE
430 };
431 
432 static SLang_IConstant_Type PCRE_Consts [] =
433 {
434    /* compile options */
435 #ifndef PCRE_ANCHORED
436 # define PCRE_ANCHORED 0
437 #endif
438    MAKE_ICONSTANT("PCRE_ANCHORED", PCRE_ANCHORED),
439 #ifndef PCRE_AUTO_CALLOUT
440 # define PCRE_AUTO_CALLOUT 0
441 #endif
442    MAKE_ICONSTANT("PCRE_AUTO_CALLOUT", PCRE_AUTO_CALLOUT),
443 #ifndef PCRE_BSR_ANYCRLF
444 # define PCRE_BSR_ANYCRLF 0
445 #endif
446    MAKE_ICONSTANT("PCRE_BSR_ANYCRLF", PCRE_BSR_ANYCRLF),
447 #ifndef PCRE_BSR_UNICODE
448 # define PCRE_BSR_UNICODE 0
449 #endif
450    MAKE_ICONSTANT("PCRE_BSR_UNICODE", PCRE_BSR_UNICODE),
451 #ifndef PCRE_CASELESS
452 # define PCRE_CASELESS 0
453 #endif
454    MAKE_ICONSTANT("PCRE_CASELESS", PCRE_CASELESS),
455 #ifndef PCRE_DUPNAMES
456 # define PCRE_DUPNAMES 0
457 #endif
458    MAKE_ICONSTANT("PCRE_DUPNAMES", PCRE_DUPNAMES),
459 #ifndef PCRE_DOLLAR_ENDONLY
460 # define PCRE_DOLLAR_ENDONLY 0
461 #endif
462    MAKE_ICONSTANT("PCRE_DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY),
463 #ifndef PCRE_DOTALL
464 # define PCRE_DOTALL 0
465 #endif
466    MAKE_ICONSTANT("PCRE_DOTALL", PCRE_DOTALL),
467 #ifndef PCRE_EXTENDED
468 # define PCRE_EXTENDED 0
469 #endif
470    MAKE_ICONSTANT("PCRE_EXTENDED", PCRE_EXTENDED),
471 #ifndef PCRE_EXTRA
472 # define PCRE_EXTRA 0
473 #endif
474    MAKE_ICONSTANT("PCRE_EXTRA", PCRE_EXTRA),
475 #ifndef PCRE_FIRSTLINE
476 # define PCRE_FIRSTLINE 0
477 #endif
478    MAKE_ICONSTANT("PCRE_FIRSTLINE", PCRE_FIRSTLINE),
479 #ifndef PCRE_JAVASCRIPT_COMPAT
480 # define PCRE_JAVASCRIPT_COMPAT 0
481 #endif
482    MAKE_ICONSTANT("PCRE_JAVASCRIPT_COMPAT", PCRE_JAVASCRIPT_COMPAT),
483 #ifndef PCRE_MULTILINE
484 # define PCRE_MULTILINE 0
485 #endif
486    MAKE_ICONSTANT("PCRE_MULTILINE", PCRE_MULTILINE),
487 #ifndef PCRE_NEVER_UTF
488 # define PCRE_NEVER_UTF 0
489 #endif
490    MAKE_ICONSTANT("PCRE_NEVER_UTF", PCRE_NEVER_UTF),
491 #ifndef PCRE_NEWLINE_ANY
492 # define PCRE_NEWLINE_ANY 0
493 #endif
494    MAKE_ICONSTANT("PCRE_NEWLINE_ANY", PCRE_NEWLINE_ANY),
495 #ifndef PCRE_NEWLINE_ANYCRLF
496 # define PCRE_NEWLINE_ANYCRLF 0
497 #endif
498    MAKE_ICONSTANT("PCRE_NEWLINE_ANYCRLF", PCRE_NEWLINE_ANYCRLF),
499 #ifndef PCRE_NEWLINE_CR
500 # define PCRE_NEWLINE_CR 0
501 #endif
502    MAKE_ICONSTANT("PCRE_NEWLINE_CR", PCRE_NEWLINE_CR),
503 #ifndef PCRE_NEWLINE_CRLF
504 # define PCRE_NEWLINE_CRLF 0
505 #endif
506    MAKE_ICONSTANT("PCRE_NEWLINE_CRLF", PCRE_NEWLINE_CRLF),
507 #ifndef PCRE_NEWLINE_LF
508 # define PCRE_NEWLINE_LF 0
509 #endif
510    MAKE_ICONSTANT("PCRE_NEWLINE_LF", PCRE_NEWLINE_LF),
511 #ifndef PCRE_NO_START_OPTIMIZE
512 # define PCRE_NO_START_OPTIMIZE 0
513 #endif
514    MAKE_ICONSTANT("PCRE_NO_START_OPTIMIZE", PCRE_NO_START_OPTIMIZE),
515 #ifndef PCRE_NOTEMPTY
516 # define PCRE_NOTEMPTY 0
517 #endif
518    MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
519 #ifndef PCRE_NO_AUTO_CAPTURE
520 # define PCRE_NO_AUTO_CAPTURE 0
521 #endif
522    MAKE_ICONSTANT("PCRE_NO_AUTO_CAPTURE", PCRE_NO_AUTO_CAPTURE),
523 #ifndef PCRE_NO_AUTO_POSSESS
524 # define PCRE_NO_AUTO_POSSESS 0
525 #endif
526    MAKE_ICONSTANT("PCRE_NO_AUTO_POSSESS", PCRE_NO_AUTO_POSSESS),
527 #ifndef PCRE_NO_UTF8_CHECK
528 # define PCRE_NO_UTF8_CHECK 0
529 #endif
530    MAKE_ICONSTANT("PCRE_NO_UTF8_CHECK", PCRE_NO_UTF8_CHECK),
531 #ifndef PCRE_UCP
532 # define PCRE_UCP 0
533 #endif
534    MAKE_ICONSTANT("PCRE_UCP", PCRE_UCP),
535 #ifndef PCRE_UNGREEDY
536 # define PCRE_UNGREEDY 0
537 #endif
538    MAKE_ICONSTANT("PCRE_UNGREEDY", PCRE_UNGREEDY),
539 #ifndef PCRE_UTF8
540 # define PCRE_UTF8 0
541 #endif
542    MAKE_ICONSTANT("PCRE_UTF8", PCRE_UTF8),
543 
544    /* exec options */
545 #ifndef PCRE_NOTBOL
546 # define PCRE_NOTBOL 0
547 #endif
548    MAKE_ICONSTANT("PCRE_NOTBOL", PCRE_NOTBOL),
549 #ifndef PCRE_NOTEOL
550 # define PCRE_NOTEOL 0
551 #endif
552    MAKE_ICONSTANT("PCRE_NOTEOL", PCRE_NOTEOL),
553 #ifndef PCRE_NOTEMPTY
554 # define PCRE_NOTEMPTY 0
555 #endif
556    MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
557 #ifndef PCRE_PARTIAL_SOFT
558 # define PCRE_PARTIAL_SOFT 0
559 #endif
560    MAKE_ICONSTANT("PCRE_PARTIAL_SOFT", PCRE_PARTIAL_SOFT),
561 #ifndef PCRE_DFA_SHORTEST
562 # define PCRE_DFA_SHORTEST 0
563 #endif
564    MAKE_ICONSTANT("PCRE_DFA_SHORTEST", PCRE_DFA_SHORTEST),
565 #ifndef PCRE_DFA_RESTART
566 # define PCRE_DFA_RESTART 0
567 #endif
568    MAKE_ICONSTANT("PCRE_DFA_RESTART", PCRE_DFA_RESTART),
569 #ifndef PCRE_PARTIAL_HARD
570 # define PCRE_PARTIAL_HARD 0
571 #endif
572    MAKE_ICONSTANT("PCRE_PARTIAL_HARD", PCRE_PARTIAL_HARD),
573 #ifndef PCRE_NOTEMPTY_ATSTART
574 # define PCRE_NOTEMPTY_ATSTART 0
575 #endif
576    MAKE_ICONSTANT("PCRE_NOTEMPTY_ATSTART", PCRE_NOTEMPTY_ATSTART),
577 
578    SLANG_END_ICONST_TABLE
579 };
580 
581 #undef P
582 #undef I
583 #undef V
584 #undef S
585 
register_pcre_type(void)586 static int register_pcre_type (void)
587 {
588    SLang_Class_Type *cl;
589 
590    if (PCRE_Type_Id != 0)
591      return 0;
592 
593    if (NULL == (cl = SLclass_allocate_class ("PCRE_Type")))
594      return -1;
595 
596    if (-1 == SLclass_set_destroy_function (cl, destroy_pcre))
597      return -1;
598 
599    /* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
600     * type.
601     */
602    if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (PCRE_Type), SLANG_CLASS_TYPE_MMT))
603      return -1;
604 
605    PCRE_Type_Id = SLclass_get_class_id (cl);
606    if (-1 == SLclass_patch_intrin_fun_table1 (PCRE_Intrinsics, DUMMY_PCRE_TYPE, PCRE_Type_Id))
607      return -1;
608 
609    return 0;
610 }
611 
do_malloc(size_t n)612 static void *do_malloc (size_t n)
613 {
614    return (void *) SLmalloc (n);
615 }
616 
do_free(void * x)617 static void do_free (void *x)
618 {
619    SLfree ((char *) x);
620 }
621 
init_pcre_module_ns(char * ns_name)622 int init_pcre_module_ns (char *ns_name)
623 {
624    SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
625    if (ns == NULL)
626      return -1;
627 
628    if (-1 == register_pcre_type ())
629      return -1;
630 
631    pcre_malloc = do_malloc;
632    pcre_free = do_free;
633 
634    if ((-1 == SLns_add_intrin_fun_table (ns, PCRE_Intrinsics, "__PCRE__"))
635        || (-1 == SLns_add_iconstant_table (ns, PCRE_Consts, NULL)))
636      return -1;
637 
638    return 0;
639 }
640 
641 /* This function is optional */
deinit_pcre_module(void)642 void deinit_pcre_module (void)
643 {
644 }
645 
646