1 /*
2  * CLISP interface to GNU regex
3  * originally by Bruno Haible 1995-04-14
4  * rewritten by Sam Steingold 2003-08-06
5  */
6 
7 #include "clisp.h"
8 #include "config.h"
9 #include <sys/types.h>          /* regex.h needs this */
10 #include <stdlib.h>             /* declare malloc(), free() */
11 #include <stdio.h>              /* BUFSIZ */
12 #include <regex.h>
13 
14 #ifndef FOREIGN
15 #error FOREIGN is not defined.
16 #error REGEXP needs a CLISP built with the foreign pointer datatype support.
17 #error Go into the main CLISP makefile and add a -DFOREIGN=void*
18 #error to CFLAGS make variable and rebuild CLISP before coming back here.
19 #endif
20 
21 DEFMODULE(regexp,"REGEXP")
22 
DEFFLAGSET(regexp_compile_flags,REG_EXTENDED REG_ICASE REG_NEWLINE REG_NOSUB)23 DEFFLAGSET(regexp_compile_flags, REG_EXTENDED REG_ICASE REG_NEWLINE REG_NOSUB)
24 DEFUN(REGEXP::REGEXP-COMPILE, pattern &key EXTENDED IGNORE-CASE NEWLINE NOSUB)
25 { /* compile the pattern into a regular expression */
26   int cflags = regexp_compile_flags();
27   object pattern = check_string(popSTACK());
28   int status;
29   regex_t* re;
30  restart_regcomp:
31   re = (regex_t*)clisp_malloc(sizeof(regex_t));
32   with_string_0(pattern,Symbol_value(S(utf_8)),patternz, {
33     begin_system_call();
34     status = regcomp(re,patternz,cflags);
35     end_system_call();
36   });
37   if (status) {
38     char buf[BUFSIZ];
39     begin_system_call();
40     regerror(status,re,buf,BUFSIZ);
41     free(re);
42     end_system_call();
43     pushSTACK(NIL); /* no PLACE */
44     pushSTACK(NIL); pushSTACK(pattern);
45     STACK_1 = asciz_to_string(buf,GLO(misc_encoding));
46     pushSTACK(TheSubr(subr_self)->name);
47     check_value(error_condition,"~S (~S): ~S");
48     pattern = value1;
49     goto restart_regcomp;
50   }
51   pushSTACK(allocate_fpointer((FOREIGN)re));
52   pushSTACK(STACK_0);pushSTACK(``REGEXP::REGEXP-FREE``);funcall(L(finalize),2);
53   VALUES1(popSTACK());          /* foreign pointer */
54 }
55 
56 DEFUN(REGEXP::REGEXP-FREE, compiled)
57 { /* release the contents and the data of the compiled pattern */
58   object fp = popSTACK();
59   if (fpointerp(fp) && fp_validp(TheFpointer(fp))) {
60     regex_t *re = (regex_t*)TheFpointer(fp)->fp_pointer;
61     if (re) {
62       regfree(re); free(re);
63       TheFpointer(fp)->fp_pointer = NULL;
64       mark_fp_invalid(TheFpointer(fp));
65       VALUES1(T);
66     } else VALUES1(NIL);
67   } else VALUES1(NIL);
68 }
69 
70 typedef enum { ret_values, ret_list, ret_vector, ret_bool } rettype_t;
71 #define CHECK_RETTYPE(x)                                              \
72   (eq(x,S(list)) ? ret_list                                           \
73    : eq(x,S(vector)) ? ret_vector                                     \
74    : eq(x,S(boolean)) ? ret_bool                                      \
75    : ret_values)
76 
DEFFLAGSET(regexp_exec_flags,REG_NOTBOL REG_NOTEOL)77 DEFFLAGSET(regexp_exec_flags, REG_NOTBOL REG_NOTEOL)
78 DEFUN(REGEXP::REGEXP-EXEC,pattern string &key           \
79       RETURN-TYPE :START :END NOTBOL NOTEOL)
80 { /* match the compiled pattern against the string */
81   int eflags = regexp_exec_flags();
82   object string = (STACK_3 = check_string(STACK_3));
83   unsigned int length = vector_length(string);
84   unsigned int start = check_uint_defaulted(STACK_1,0);
85   unsigned int end = check_uint_defaulted(STACK_0,length);
86   int status;
87   rettype_t rettype = CHECK_RETTYPE(STACK_2);
88   regex_t *re;
89   regmatch_t *ret;
90   size_t ret_buffer_size;
91   skipSTACK(3);                 /* drop all options */
92   for (;;) {
93     STACK_1 = check_fpointer(STACK_1,true);
94     re = (regex_t*)TheFpointer(STACK_1)->fp_pointer;
95     if (re != NULL) break;
96     pushSTACK(NIL);             /* no PLACE */
97     pushSTACK(STACK_(1+1)); pushSTACK(TheSubr(subr_self)->name);
98     check_value(error_condition,GETTEXT("~S: NULL pattern ~S"));
99     STACK_1 = value1;
100   }
101   string = STACK_0;
102   if (end != length || start != 0) {
103     pushSTACK(sfixnum((int)(end-start)));
104     pushSTACK(S(Kelement_type)); pushSTACK(S(character));
105     pushSTACK(S(Kdisplaced_to)); pushSTACK(string);
106     pushSTACK(S(Kdisplaced_index_offset)); pushSTACK(posfixnum(start));
107     funcall(L(make_array),7);
108     string = value1;
109   }
110   ret_buffer_size = (re->re_nsub+1)*sizeof(regmatch_t);
111   if (ret_buffer_size <= BUFSIZ) {
112     begin_system_call();
113     ret = (regmatch_t*)alloca(ret_buffer_size);
114     end_system_call();
115     if (ret == NULL) OS_error();
116   } else {
117     /* Don't use alloca for sizes > BUFSIZ, it's not safe! */
118     ret = (regmatch_t*)clisp_malloc(ret_buffer_size);
119   }
120   with_string_0(string,Symbol_value(S(utf_8)),stringz, {
121     begin_system_call();
122     status = regexec(re,stringz,re->re_nsub+1,ret,eflags);
123     end_system_call();
124     if (status) {
125       switch (rettype) {
126         case ret_values: VALUES0; break;        /* VALUES => no values */
127         case ret_list:   VALUES1(NIL); break;   /* LIST => () */
128         case ret_vector: VALUES1(`#()`); break; /* VECTOR => #() */
129         case ret_bool:   VALUES1(NIL); break;   /* BOOLEAN => NIL */
130       }
131     } else {
132       uintL re_count;
133       if (rettype != ret_bool) {
134         for (re_count = 0; re_count <= re->re_nsub; re_count++)
135           if (ret[re_count].rm_so >= 0 && ret[re_count].rm_eo >= 0) {
136             pushSTACK(posfixnum(start
137                                 +Encoding_mblen(Symbol_value(S(utf_8)))(Symbol_value(S(utf_8)),
138                                                                         (const uintB*)stringz,
139                                                                         (const uintB*)stringz+ret[re_count].rm_so)));
140             pushSTACK(posfixnum(start
141                                 +Encoding_mblen(Symbol_value(S(utf_8)))(Symbol_value(S(utf_8)),
142                                                                         (const uintB*)stringz,
143                                                                         (const uintB*)stringz+ret[re_count].rm_eo)));
144             funcall(`REGEXP::MAKE-MATCH-BOA`,2); pushSTACK(value1);
145           } else pushSTACK(NIL);
146       }
147       switch (rettype) {
148         case ret_values:
149           if (re_count < fixnum_to_V(Symbol_value(S(multiple_values_limit)))) {
150             STACK_to_mv(re_count);
151             break;
152           } /* else FALLTHROUGH */
153         case ret_list:   VALUES1(listof(re_count)); break;
154         case ret_vector: VALUES1(vectorof(re_count)); break;
155         case ret_bool:   VALUES1(T); break;
156       }
157     }
158   });
159   if (ret_buffer_size > BUFSIZ) {
160     /* buffer allocated using malloc, needs to be free'd */
161     begin_system_call();
162     free(ret);
163     end_system_call();
164   }
165   skipSTACK(2);                 /* drop pattern & string */
166 }
167