1 /*-------------------------------------------------------------------------
2  *
3  * regis.c
4  *		Fast regex subset
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/tsearch/regis.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "tsearch/dicts/regis.h"
18 #include "tsearch/ts_locale.h"
19 
20 #define RS_IN_ONEOF 1
21 #define RS_IN_ONEOF_IN	2
22 #define RS_IN_NONEOF	3
23 #define RS_IN_WAIT	4
24 
25 
26 /*
27  * Test whether a regex is of the subset supported here.
28  * Keep this in sync with RS_compile!
29  */
30 bool
RS_isRegis(const char * str)31 RS_isRegis(const char *str)
32 {
33 	int			state = RS_IN_WAIT;
34 	const char *c = str;
35 
36 	while (*c)
37 	{
38 		if (state == RS_IN_WAIT)
39 		{
40 			if (t_isalpha(c))
41 				 /* okay */ ;
42 			else if (t_iseq(c, '['))
43 				state = RS_IN_ONEOF;
44 			else
45 				return false;
46 		}
47 		else if (state == RS_IN_ONEOF)
48 		{
49 			if (t_iseq(c, '^'))
50 				state = RS_IN_NONEOF;
51 			else if (t_isalpha(c))
52 				state = RS_IN_ONEOF_IN;
53 			else
54 				return false;
55 		}
56 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
57 		{
58 			if (t_isalpha(c))
59 				 /* okay */ ;
60 			else if (t_iseq(c, ']'))
61 				state = RS_IN_WAIT;
62 			else
63 				return false;
64 		}
65 		else
66 			elog(ERROR, "internal error in RS_isRegis: state %d", state);
67 		c += pg_mblen(c);
68 	}
69 
70 	return (state == RS_IN_WAIT);
71 }
72 
73 static RegisNode *
newRegisNode(RegisNode * prev,int len)74 newRegisNode(RegisNode *prev, int len)
75 {
76 	RegisNode  *ptr;
77 
78 	ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
79 	if (prev)
80 		prev->next = ptr;
81 	return ptr;
82 }
83 
84 void
RS_compile(Regis * r,bool issuffix,const char * str)85 RS_compile(Regis *r, bool issuffix, const char *str)
86 {
87 	int			len = strlen(str);
88 	int			state = RS_IN_WAIT;
89 	const char *c = str;
90 	RegisNode  *ptr = NULL;
91 
92 	memset(r, 0, sizeof(Regis));
93 	r->issuffix = (issuffix) ? 1 : 0;
94 
95 	while (*c)
96 	{
97 		if (state == RS_IN_WAIT)
98 		{
99 			if (t_isalpha(c))
100 			{
101 				if (ptr)
102 					ptr = newRegisNode(ptr, len);
103 				else
104 					ptr = r->node = newRegisNode(NULL, len);
105 				COPYCHAR(ptr->data, c);
106 				ptr->type = RSF_ONEOF;
107 				ptr->len = pg_mblen(c);
108 			}
109 			else if (t_iseq(c, '['))
110 			{
111 				if (ptr)
112 					ptr = newRegisNode(ptr, len);
113 				else
114 					ptr = r->node = newRegisNode(NULL, len);
115 				ptr->type = RSF_ONEOF;
116 				state = RS_IN_ONEOF;
117 			}
118 			else				/* shouldn't get here */
119 				elog(ERROR, "invalid regis pattern: \"%s\"", str);
120 		}
121 		else if (state == RS_IN_ONEOF)
122 		{
123 			if (t_iseq(c, '^'))
124 			{
125 				ptr->type = RSF_NONEOF;
126 				state = RS_IN_NONEOF;
127 			}
128 			else if (t_isalpha(c))
129 			{
130 				COPYCHAR(ptr->data, c);
131 				ptr->len = pg_mblen(c);
132 				state = RS_IN_ONEOF_IN;
133 			}
134 			else				/* shouldn't get here */
135 				elog(ERROR, "invalid regis pattern: \"%s\"", str);
136 		}
137 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
138 		{
139 			if (t_isalpha(c))
140 			{
141 				COPYCHAR(ptr->data + ptr->len, c);
142 				ptr->len += pg_mblen(c);
143 			}
144 			else if (t_iseq(c, ']'))
145 				state = RS_IN_WAIT;
146 			else				/* shouldn't get here */
147 				elog(ERROR, "invalid regis pattern: \"%s\"", str);
148 		}
149 		else
150 			elog(ERROR, "internal error in RS_compile: state %d", state);
151 		c += pg_mblen(c);
152 	}
153 
154 	if (state != RS_IN_WAIT)	/* shouldn't get here */
155 		elog(ERROR, "invalid regis pattern: \"%s\"", str);
156 
157 	ptr = r->node;
158 	while (ptr)
159 	{
160 		r->nchar++;
161 		ptr = ptr->next;
162 	}
163 }
164 
165 void
RS_free(Regis * r)166 RS_free(Regis *r)
167 {
168 	RegisNode  *ptr = r->node,
169 			   *tmp;
170 
171 	while (ptr)
172 	{
173 		tmp = ptr->next;
174 		pfree(ptr);
175 		ptr = tmp;
176 	}
177 
178 	r->node = NULL;
179 }
180 
181 #ifdef USE_WIDE_UPPER_LOWER
182 static bool
mb_strchr(char * str,char * c)183 mb_strchr(char *str, char *c)
184 {
185 	int			clen,
186 				plen,
187 				i;
188 	char	   *ptr = str;
189 	bool		res = false;
190 
191 	clen = pg_mblen(c);
192 	while (*ptr && !res)
193 	{
194 		plen = pg_mblen(ptr);
195 		if (plen == clen)
196 		{
197 			i = plen;
198 			res = true;
199 			while (i--)
200 				if (*(ptr + i) != *(c + i))
201 				{
202 					res = false;
203 					break;
204 				}
205 		}
206 
207 		ptr += plen;
208 	}
209 
210 	return res;
211 }
212 #else
213 #define mb_strchr(s,c)	( (strchr((s),*(c)) == NULL) ? false : true )
214 #endif
215 
216 
217 bool
RS_execute(Regis * r,char * str)218 RS_execute(Regis *r, char *str)
219 {
220 	RegisNode  *ptr = r->node;
221 	char	   *c = str;
222 	int			len = 0;
223 
224 	while (*c)
225 	{
226 		len++;
227 		c += pg_mblen(c);
228 	}
229 
230 	if (len < r->nchar)
231 		return 0;
232 
233 	c = str;
234 	if (r->issuffix)
235 	{
236 		len -= r->nchar;
237 		while (len-- > 0)
238 			c += pg_mblen(c);
239 	}
240 
241 
242 	while (ptr)
243 	{
244 		switch (ptr->type)
245 		{
246 			case RSF_ONEOF:
247 				if (!mb_strchr((char *) ptr->data, c))
248 					return false;
249 				break;
250 			case RSF_NONEOF:
251 				if (mb_strchr((char *) ptr->data, c))
252 					return false;
253 				break;
254 			default:
255 				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
256 		}
257 		ptr = ptr->next;
258 		c += pg_mblen(c);
259 	}
260 
261 	return true;
262 }
263