1 /* Regular expressions via PCRE */
2
3 # include "appUtilConfig.h"
4
5 # include <string.h>
6
7 # include <reg.h>
8 # include <pcre.h>
9
10 # include <appDebugon.h>
11
regEscape(const char * pattern)12 static char * regEscape( const char * pattern )
13 {
14 int l= 2* strlen( pattern );
15 char * escaped= (char *)malloc( 2* l+ 1 );
16
17 if ( escaped )
18 {
19 const char * from= pattern;
20 char * to= escaped;
21
22 while( *from )
23 {
24 if ( *from == '\\' ||
25 *from == '^' ||
26 *from == '$' ||
27 *from == '.' ||
28 *from == '[' ||
29 *from == '|' ||
30 *from == '(' ||
31 *from == ')' ||
32 *from == '?' ||
33 *from == '*' ||
34 *from == '+' ||
35 *from == '{' )
36 { *(to++)= '\\'; }
37
38 *(to++)= *(from++);
39 }
40
41 *to= '\0';
42 }
43
44 return escaped;
45 }
46
regCompile(const char * pattern,int options)47 regProg * regCompile( const char * pattern,
48 int options )
49 {
50 pcre * rval;
51 int error= 0;
52 int erroffset= 0;
53 const char * errormsg= (const char *)0;
54 char * escaped= (char *)0;
55
56 const unsigned char * const tableptr= (const unsigned char *)0;
57 int pcre_opts= PCRE_UTF8;
58
59 {
60 static int checkedOptions= 0;
61
62 if ( ! checkedOptions )
63 {
64 int res;
65 int has;
66
67 has= 0;
68 res= pcre_config( PCRE_CONFIG_UTF8, &has );
69 if ( res || ! has )
70 { appDebug( "PCRE_CONFIG_UTF8 not set!\n" ); }
71
72 has= 0;
73 res= pcre_config( PCRE_CONFIG_UNICODE_PROPERTIES, &has );
74 if ( res || ! has )
75 { appDebug( "PCRE_CONFIG_UNICODE_PROPERTIES not set!\n" ); }
76
77 checkedOptions= 1;
78 }
79 }
80
81 if ( options & REGflagESCAPE_REGEX )
82 {
83 escaped= regEscape( pattern );
84 if ( ! escaped )
85 { SXDEB(pattern,escaped); return (regProg *)0; }
86 pattern= escaped;
87 }
88
89 rval= pcre_compile2( pattern, pcre_opts,
90 &error, &errormsg, &erroffset,
91 tableptr );
92
93 if ( ! rval )
94 { XSSDEB(rval,errormsg,pattern+erroffset); }
95
96 if ( escaped )
97 { free( escaped ); }
98
99 return (void *)rval;
100 }
101
regFindLeftToRight(ExpressionMatch * em,const regProg * prog,const char * string,int fromByte,int byteLength)102 int regFindLeftToRight( ExpressionMatch * em,
103 const regProg * prog,
104 const char * string,
105 int fromByte,
106 int byteLength )
107 {
108 int res;
109
110 int opts= 0;
111
112 res= pcre_exec( (pcre *)prog, (pcre_extra *)0,
113 (const char *)string, byteLength, fromByte, opts,
114 em->emMatches, 2+(2*REG_MAX_MATCH)/*!*/+1+REG_MAX_MATCH );
115
116 # if 0
117 if ( res >= 0 )
118 {
119 appDebug( "# %d..%d: \"%.*s\"\n",
120 em->emMatches[0], em->emMatches[1],
121 em->emMatches[1]- em->emMatches[0],
122 string+ em->emMatches[0] );
123 }
124 # endif
125
126 return res >= 0;
127 }
128
regFindRightToLeft(ExpressionMatch * em,const regProg * prog,const char * string,int fromByte,int byteLength)129 int regFindRightToLeft( ExpressionMatch * em,
130 const regProg * prog,
131 const char * string,
132 int fromByte,
133 int byteLength )
134 {
135 int res= PCRE_ERROR_NOMATCH;
136
137 int opts= PCRE_ANCHORED;
138
139 while( fromByte >= 0 )
140 {
141 /* UTF-8 */
142 if ( ( fromByte & 0xc0 ) == 0x80 )
143 { fromByte--; continue; }
144
145 res= pcre_exec( (pcre *)prog, (pcre_extra *)0,
146 (const char *)string, byteLength, fromByte, opts,
147 em->emMatches, 2+(2*REG_MAX_MATCH)/*!*/+1+REG_MAX_MATCH );
148
149 if ( res != PCRE_ERROR_NOMATCH )
150 { break; }
151
152 fromByte--;
153 }
154
155 return res >= 0;
156 }
157
regGetMatch(int * pFrom,int * pPast,const ExpressionMatch * em,int n)158 int regGetMatch( int * pFrom,
159 int * pPast,
160 const ExpressionMatch * em,
161 int n )
162 {
163 if ( n >= REG_MAX_MATCH )
164 { LLDEB(n,REG_MAX_MATCH); return -1; }
165
166 if ( em->emMatches[0] >= 0 && em->emMatches[2+n+0] >= 0 )
167 {
168 *pFrom= em->emMatches[2+n+0];
169 *pPast= em->emMatches[2+n+1];
170 return 0;
171 }
172
173 return -1;
174 }
175
regGetFullMatch(int * pFrom,int * pPast,const ExpressionMatch * em)176 int regGetFullMatch( int * pFrom,
177 int * pPast,
178 const ExpressionMatch * em )
179 {
180 if ( em->emMatches[0] >= 0 )
181 {
182 *pFrom= em->emMatches[0];
183 *pPast= em->emMatches[1];
184 return 0;
185 }
186
187 return -1;
188 }
189
regFree(regProg * prog)190 void regFree( regProg * prog )
191 { pcre_free( prog ); }
192
193