1 /*
2 * CLISP interface to GNU regex
3 * originally by Bruno Haible 1995-04-14
4 * rewritten by Sam Steingold 2003-08-06
5 */
6
7 #include "clisp.h"
8 #include "config.h"
9 #include <sys/types.h> /* regex.h needs this */
10 #include <stdlib.h> /* declare malloc(), free() */
11 #include <stdio.h> /* BUFSIZ */
12 #include <regex.h>
13
14 #ifndef FOREIGN
15 #error FOREIGN is not defined.
16 #error REGEXP needs a CLISP built with the foreign pointer datatype support.
17 #error Go into the main CLISP makefile and add a -DFOREIGN=void*
18 #error to CFLAGS make variable and rebuild CLISP before coming back here.
19 #endif
20
21 DEFMODULE(regexp,"REGEXP")
22
DEFFLAGSET(regexp_compile_flags,REG_EXTENDED REG_ICASE REG_NEWLINE REG_NOSUB)23 DEFFLAGSET(regexp_compile_flags, REG_EXTENDED REG_ICASE REG_NEWLINE REG_NOSUB)
24 DEFUN(REGEXP::REGEXP-COMPILE, pattern &key EXTENDED IGNORE-CASE NEWLINE NOSUB)
25 { /* compile the pattern into a regular expression */
26 int cflags = regexp_compile_flags();
27 object pattern = check_string(popSTACK());
28 int status;
29 regex_t* re;
30 restart_regcomp:
31 re = (regex_t*)clisp_malloc(sizeof(regex_t));
32 with_string_0(pattern,Symbol_value(S(utf_8)),patternz, {
33 begin_system_call();
34 status = regcomp(re,patternz,cflags);
35 end_system_call();
36 });
37 if (status) {
38 char buf[BUFSIZ];
39 begin_system_call();
40 regerror(status,re,buf,BUFSIZ);
41 free(re);
42 end_system_call();
43 pushSTACK(NIL); /* no PLACE */
44 pushSTACK(NIL); pushSTACK(pattern);
45 STACK_1 = asciz_to_string(buf,GLO(misc_encoding));
46 pushSTACK(TheSubr(subr_self)->name);
47 check_value(error_condition,"~S (~S): ~S");
48 pattern = value1;
49 goto restart_regcomp;
50 }
51 pushSTACK(allocate_fpointer((FOREIGN)re));
52 pushSTACK(STACK_0);pushSTACK(``REGEXP::REGEXP-FREE``);funcall(L(finalize),2);
53 VALUES1(popSTACK()); /* foreign pointer */
54 }
55
56 DEFUN(REGEXP::REGEXP-FREE, compiled)
57 { /* release the contents and the data of the compiled pattern */
58 object fp = popSTACK();
59 if (fpointerp(fp) && fp_validp(TheFpointer(fp))) {
60 regex_t *re = (regex_t*)TheFpointer(fp)->fp_pointer;
61 if (re) {
62 regfree(re); free(re);
63 TheFpointer(fp)->fp_pointer = NULL;
64 mark_fp_invalid(TheFpointer(fp));
65 VALUES1(T);
66 } else VALUES1(NIL);
67 } else VALUES1(NIL);
68 }
69
70 typedef enum { ret_values, ret_list, ret_vector, ret_bool } rettype_t;
71 #define CHECK_RETTYPE(x) \
72 (eq(x,S(list)) ? ret_list \
73 : eq(x,S(vector)) ? ret_vector \
74 : eq(x,S(boolean)) ? ret_bool \
75 : ret_values)
76
DEFFLAGSET(regexp_exec_flags,REG_NOTBOL REG_NOTEOL)77 DEFFLAGSET(regexp_exec_flags, REG_NOTBOL REG_NOTEOL)
78 DEFUN(REGEXP::REGEXP-EXEC,pattern string &key \
79 RETURN-TYPE :START :END NOTBOL NOTEOL)
80 { /* match the compiled pattern against the string */
81 int eflags = regexp_exec_flags();
82 object string = (STACK_3 = check_string(STACK_3));
83 unsigned int length = vector_length(string);
84 unsigned int start = check_uint_defaulted(STACK_1,0);
85 unsigned int end = check_uint_defaulted(STACK_0,length);
86 int status;
87 rettype_t rettype = CHECK_RETTYPE(STACK_2);
88 regex_t *re;
89 regmatch_t *ret;
90 size_t ret_buffer_size;
91 skipSTACK(3); /* drop all options */
92 for (;;) {
93 STACK_1 = check_fpointer(STACK_1,true);
94 re = (regex_t*)TheFpointer(STACK_1)->fp_pointer;
95 if (re != NULL) break;
96 pushSTACK(NIL); /* no PLACE */
97 pushSTACK(STACK_(1+1)); pushSTACK(TheSubr(subr_self)->name);
98 check_value(error_condition,GETTEXT("~S: NULL pattern ~S"));
99 STACK_1 = value1;
100 }
101 string = STACK_0;
102 if (end != length || start != 0) {
103 pushSTACK(sfixnum((int)(end-start)));
104 pushSTACK(S(Kelement_type)); pushSTACK(S(character));
105 pushSTACK(S(Kdisplaced_to)); pushSTACK(string);
106 pushSTACK(S(Kdisplaced_index_offset)); pushSTACK(posfixnum(start));
107 funcall(L(make_array),7);
108 string = value1;
109 }
110 ret_buffer_size = (re->re_nsub+1)*sizeof(regmatch_t);
111 if (ret_buffer_size <= BUFSIZ) {
112 begin_system_call();
113 ret = (regmatch_t*)alloca(ret_buffer_size);
114 end_system_call();
115 if (ret == NULL) OS_error();
116 } else {
117 /* Don't use alloca for sizes > BUFSIZ, it's not safe! */
118 ret = (regmatch_t*)clisp_malloc(ret_buffer_size);
119 }
120 with_string_0(string,Symbol_value(S(utf_8)),stringz, {
121 begin_system_call();
122 status = regexec(re,stringz,re->re_nsub+1,ret,eflags);
123 end_system_call();
124 if (status) {
125 switch (rettype) {
126 case ret_values: VALUES0; break; /* VALUES => no values */
127 case ret_list: VALUES1(NIL); break; /* LIST => () */
128 case ret_vector: VALUES1(`#()`); break; /* VECTOR => #() */
129 case ret_bool: VALUES1(NIL); break; /* BOOLEAN => NIL */
130 }
131 } else {
132 uintL re_count;
133 if (rettype != ret_bool) {
134 for (re_count = 0; re_count <= re->re_nsub; re_count++)
135 if (ret[re_count].rm_so >= 0 && ret[re_count].rm_eo >= 0) {
136 pushSTACK(posfixnum(start
137 +Encoding_mblen(Symbol_value(S(utf_8)))(Symbol_value(S(utf_8)),
138 (const uintB*)stringz,
139 (const uintB*)stringz+ret[re_count].rm_so)));
140 pushSTACK(posfixnum(start
141 +Encoding_mblen(Symbol_value(S(utf_8)))(Symbol_value(S(utf_8)),
142 (const uintB*)stringz,
143 (const uintB*)stringz+ret[re_count].rm_eo)));
144 funcall(`REGEXP::MAKE-MATCH-BOA`,2); pushSTACK(value1);
145 } else pushSTACK(NIL);
146 }
147 switch (rettype) {
148 case ret_values:
149 if (re_count < fixnum_to_V(Symbol_value(S(multiple_values_limit)))) {
150 STACK_to_mv(re_count);
151 break;
152 } /* else FALLTHROUGH */
153 case ret_list: VALUES1(listof(re_count)); break;
154 case ret_vector: VALUES1(vectorof(re_count)); break;
155 case ret_bool: VALUES1(T); break;
156 }
157 }
158 });
159 if (ret_buffer_size > BUFSIZ) {
160 /* buffer allocated using malloc, needs to be free'd */
161 begin_system_call();
162 free(ret);
163 end_system_call();
164 }
165 skipSTACK(2); /* drop pattern & string */
166 }
167