1 /*-------------------------------------------------------------------------
2  *
3  * regis.c
4  *		Fast regex subset
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/tsearch/regis.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "tsearch/dicts/regis.h"
18 #include "tsearch/ts_locale.h"
19 
20 #define RS_IN_ONEOF 1
21 #define RS_IN_ONEOF_IN	2
22 #define RS_IN_NONEOF	3
23 #define RS_IN_WAIT	4
24 
25 
26 /*
27  * Test whether a regex is of the subset supported here.
28  * Keep this in sync with RS_compile!
29  */
30 bool
RS_isRegis(const char * str)31 RS_isRegis(const char *str)
32 {
33 	int			state = RS_IN_WAIT;
34 	const char *c = str;
35 
36 	while (*c)
37 	{
38 		if (state == RS_IN_WAIT)
39 		{
40 			if (t_isalpha(c))
41 				 /* okay */ ;
42 			else if (t_iseq(c, '['))
43 				state = RS_IN_ONEOF;
44 			else
45 				return false;
46 		}
47 		else if (state == RS_IN_ONEOF)
48 		{
49 			if (t_iseq(c, '^'))
50 				state = RS_IN_NONEOF;
51 			else if (t_isalpha(c))
52 				state = RS_IN_ONEOF_IN;
53 			else
54 				return false;
55 		}
56 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
57 		{
58 			if (t_isalpha(c))
59 				 /* okay */ ;
60 			else if (t_iseq(c, ']'))
61 				state = RS_IN_WAIT;
62 			else
63 				return false;
64 		}
65 		else
66 			elog(ERROR, "internal error in RS_isRegis: state %d", state);
67 		c += pg_mblen(c);
68 	}
69 
70 	return (state == RS_IN_WAIT);
71 }
72 
73 static RegisNode *
newRegisNode(RegisNode * prev,int len)74 newRegisNode(RegisNode *prev, int len)
75 {
76 	RegisNode  *ptr;
77 
78 	ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
79 	if (prev)
80 		prev->next = ptr;
81 	return ptr;
82 }
83 
84 void
RS_compile(Regis * r,bool issuffix,const char * str)85 RS_compile(Regis *r, bool issuffix, const char *str)
86 {
87 	int			len = strlen(str);
88 	int			state = RS_IN_WAIT;
89 	const char *c = str;
90 	RegisNode  *ptr = NULL;
91 
92 	memset(r, 0, sizeof(Regis));
93 	r->issuffix = (issuffix) ? 1 : 0;
94 
95 	while (*c)
96 	{
97 		if (state == RS_IN_WAIT)
98 		{
99 			if (t_isalpha(c))
100 			{
101 				if (ptr)
102 					ptr = newRegisNode(ptr, len);
103 				else
104 					ptr = r->node = newRegisNode(NULL, len);
105 				COPYCHAR(ptr->data, c);
106 				ptr->type = RSF_ONEOF;
107 				ptr->len = pg_mblen(c);
108 			}
109 			else if (t_iseq(c, '['))
110 			{
111 				if (ptr)
112 					ptr = newRegisNode(ptr, len);
113 				else
114 					ptr = r->node = newRegisNode(NULL, len);
115 				ptr->type = RSF_ONEOF;
116 				state = RS_IN_ONEOF;
117 			}
118 			else				/* shouldn't get here */
119 				elog(ERROR, "invalid regis pattern: \"%s\"", str);
120 		}
121 		else if (state == RS_IN_ONEOF)
122 		{
123 			if (t_iseq(c, '^'))
124 			{
125 				ptr->type = RSF_NONEOF;
126 				state = RS_IN_NONEOF;
127 			}
128 			else if (t_isalpha(c))
129 			{
130 				COPYCHAR(ptr->data, c);
131 				ptr->len = pg_mblen(c);
132 				state = RS_IN_ONEOF_IN;
133 			}
134 			else				/* shouldn't get here */
135 				elog(ERROR, "invalid regis pattern: \"%s\"", str);
136 		}
137 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
138 		{
139 			if (t_isalpha(c))
140 			{
141 				COPYCHAR(ptr->data + ptr->len, c);
142 				ptr->len += pg_mblen(c);
143 			}
144 			else if (t_iseq(c, ']'))
145 				state = RS_IN_WAIT;
146 			else				/* shouldn't get here */
147 				elog(ERROR, "invalid regis pattern: \"%s\"", str);
148 		}
149 		else
150 			elog(ERROR, "internal error in RS_compile: state %d", state);
151 		c += pg_mblen(c);
152 	}
153 
154 	if (state != RS_IN_WAIT)	/* shouldn't get here */
155 		elog(ERROR, "invalid regis pattern: \"%s\"", str);
156 
157 	ptr = r->node;
158 	while (ptr)
159 	{
160 		r->nchar++;
161 		ptr = ptr->next;
162 	}
163 }
164 
165 void
RS_free(Regis * r)166 RS_free(Regis *r)
167 {
168 	RegisNode  *ptr = r->node,
169 			   *tmp;
170 
171 	while (ptr)
172 	{
173 		tmp = ptr->next;
174 		pfree(ptr);
175 		ptr = tmp;
176 	}
177 
178 	r->node = NULL;
179 }
180 
181 static bool
mb_strchr(char * str,char * c)182 mb_strchr(char *str, char *c)
183 {
184 	int			clen,
185 				plen,
186 				i;
187 	char	   *ptr = str;
188 	bool		res = false;
189 
190 	clen = pg_mblen(c);
191 	while (*ptr && !res)
192 	{
193 		plen = pg_mblen(ptr);
194 		if (plen == clen)
195 		{
196 			i = plen;
197 			res = true;
198 			while (i--)
199 				if (*(ptr + i) != *(c + i))
200 				{
201 					res = false;
202 					break;
203 				}
204 		}
205 
206 		ptr += plen;
207 	}
208 
209 	return res;
210 }
211 
212 bool
RS_execute(Regis * r,char * str)213 RS_execute(Regis *r, char *str)
214 {
215 	RegisNode  *ptr = r->node;
216 	char	   *c = str;
217 	int			len = 0;
218 
219 	while (*c)
220 	{
221 		len++;
222 		c += pg_mblen(c);
223 	}
224 
225 	if (len < r->nchar)
226 		return 0;
227 
228 	c = str;
229 	if (r->issuffix)
230 	{
231 		len -= r->nchar;
232 		while (len-- > 0)
233 			c += pg_mblen(c);
234 	}
235 
236 
237 	while (ptr)
238 	{
239 		switch (ptr->type)
240 		{
241 			case RSF_ONEOF:
242 				if (!mb_strchr((char *) ptr->data, c))
243 					return false;
244 				break;
245 			case RSF_NONEOF:
246 				if (mb_strchr((char *) ptr->data, c))
247 					return false;
248 				break;
249 			default:
250 				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
251 		}
252 		ptr = ptr->next;
253 		c += pg_mblen(c);
254 	}
255 
256 	return true;
257 }
258