1 /*
2  * op function for ltree and lquery
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/lquery_op.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "catalog/pg_collation.h"
11 #include "ltree.h"
12 #include "miscadmin.h"
13 #include "utils/formatting.h"
14 
15 PG_FUNCTION_INFO_V1(ltq_regex);
16 PG_FUNCTION_INFO_V1(ltq_rregex);
17 
18 PG_FUNCTION_INFO_V1(lt_q_regex);
19 PG_FUNCTION_INFO_V1(lt_q_rregex);
20 
21 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
22 
23 static char *
getlexeme(char * start,char * end,int * len)24 getlexeme(char *start, char *end, int *len)
25 {
26 	char	   *ptr;
27 	int			charlen;
28 
29 	while (start < end && (charlen = pg_mblen(start)) == 1 && t_iseq(start, '_'))
30 		start += charlen;
31 
32 	ptr = start;
33 	if (ptr >= end)
34 		return NULL;
35 
36 	while (ptr < end && !((charlen = pg_mblen(ptr)) == 1 && t_iseq(ptr, '_')))
37 		ptr += charlen;
38 
39 	*len = ptr - start;
40 	return start;
41 }
42 
43 bool
compare_subnode(ltree_level * t,char * qn,int len,int (* cmpptr)(const char *,const char *,size_t),bool anyend)44 compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend)
45 {
46 	char	   *endt = t->name + t->len;
47 	char	   *endq = qn + len;
48 	char	   *tn;
49 	int			lent,
50 				lenq;
51 	bool		isok;
52 
53 	while ((qn = getlexeme(qn, endq, &lenq)) != NULL)
54 	{
55 		tn = t->name;
56 		isok = false;
57 		while ((tn = getlexeme(tn, endt, &lent)) != NULL)
58 		{
59 			if ((lent == lenq || (lent > lenq && anyend)) &&
60 				(*cmpptr) (qn, tn, lenq) == 0)
61 			{
62 
63 				isok = true;
64 				break;
65 			}
66 			tn += lent;
67 		}
68 
69 		if (!isok)
70 			return false;
71 		qn += lenq;
72 	}
73 
74 	return true;
75 }
76 
77 int
ltree_strncasecmp(const char * a,const char * b,size_t s)78 ltree_strncasecmp(const char *a, const char *b, size_t s)
79 {
80 	char	   *al = str_tolower(a, s, DEFAULT_COLLATION_OID);
81 	char	   *bl = str_tolower(b, s, DEFAULT_COLLATION_OID);
82 	int			res;
83 
84 	res = strncmp(al, bl, s);
85 
86 	pfree(al);
87 	pfree(bl);
88 
89 	return res;
90 }
91 
92 /*
93  * See if an lquery_level matches an ltree_level
94  *
95  * This accounts for all flags including LQL_NOT, but does not
96  * consider repetition counts.
97  */
98 static bool
checkLevel(lquery_level * curq,ltree_level * curt)99 checkLevel(lquery_level *curq, ltree_level *curt)
100 {
101 	lquery_variant *curvar = LQL_FIRST(curq);
102 	bool		success;
103 
104 	success = (curq->flag & LQL_NOT) ? false : true;
105 
106 	/* numvar == 0 means '*' which matches anything */
107 	if (curq->numvar == 0)
108 		return success;
109 
110 	for (int i = 0; i < curq->numvar; i++)
111 	{
112 		int			(*cmpptr) (const char *, const char *, size_t);
113 
114 		cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
115 
116 		if (curvar->flag & LVAR_SUBLEXEME)
117 		{
118 			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
119 								(curvar->flag & LVAR_ANYEND)))
120 				return success;
121 		}
122 		else if ((curvar->len == curt->len ||
123 				  (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
124 				 (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
125 			return success;
126 
127 		curvar = LVAR_NEXT(curvar);
128 	}
129 	return !success;
130 }
131 
132 /*
133  * Try to match an lquery (of qlen items) to an ltree (of tlen items)
134  */
135 static bool
checkCond(lquery_level * curq,int qlen,ltree_level * curt,int tlen)136 checkCond(lquery_level *curq, int qlen,
137 		  ltree_level *curt, int tlen)
138 {
139 	/* Since this function recurses, it could be driven to stack overflow */
140 	check_stack_depth();
141 
142 	/* Pathological patterns could take awhile, too */
143 	CHECK_FOR_INTERRUPTS();
144 
145 	/* Loop while we have query items to consider */
146 	while (qlen > 0)
147 	{
148 		int			low,
149 					high;
150 		lquery_level *nextq;
151 
152 		/*
153 		 * Get min and max repetition counts for this query item, dealing with
154 		 * the backwards-compatibility hack that the low/high fields aren't
155 		 * meaningful for non-'*' items unless LQL_COUNT is set.
156 		 */
157 		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
158 			low = curq->low, high = curq->high;
159 		else
160 			low = high = 1;
161 
162 		/*
163 		 * We may limit "high" to the remaining text length; this avoids
164 		 * separate tests below.
165 		 */
166 		if (high > tlen)
167 			high = tlen;
168 
169 		/* Fail if a match of required number of items is impossible */
170 		if (high < low)
171 			return false;
172 
173 		/*
174 		 * Recursively check the rest of the pattern against each possible
175 		 * start point following some of this item's match(es).
176 		 */
177 		nextq = LQL_NEXT(curq);
178 		qlen--;
179 
180 		for (int matchcnt = 0; matchcnt < high; matchcnt++)
181 		{
182 			/*
183 			 * If we've consumed an acceptable number of matches of this item,
184 			 * and the rest of the pattern matches beginning here, we're good.
185 			 */
186 			if (matchcnt >= low && checkCond(nextq, qlen, curt, tlen))
187 				return true;
188 
189 			/*
190 			 * Otherwise, try to match one more text item to this query item.
191 			 */
192 			if (!checkLevel(curq, curt))
193 				return false;
194 
195 			curt = LEVEL_NEXT(curt);
196 			tlen--;
197 		}
198 
199 		/*
200 		 * Once we've consumed "high" matches, we can succeed only if the rest
201 		 * of the pattern matches beginning here.  Loop around (if you prefer,
202 		 * think of this as tail recursion).
203 		 */
204 		curq = nextq;
205 	}
206 
207 	/*
208 	 * Once we're out of query items, we match only if there's no remaining
209 	 * text either.
210 	 */
211 	return (tlen == 0);
212 }
213 
214 Datum
ltq_regex(PG_FUNCTION_ARGS)215 ltq_regex(PG_FUNCTION_ARGS)
216 {
217 	ltree	   *tree = PG_GETARG_LTREE_P(0);
218 	lquery	   *query = PG_GETARG_LQUERY_P(1);
219 	bool		res;
220 
221 	res = checkCond(LQUERY_FIRST(query), query->numlevel,
222 					LTREE_FIRST(tree), tree->numlevel);
223 
224 	PG_FREE_IF_COPY(tree, 0);
225 	PG_FREE_IF_COPY(query, 1);
226 	PG_RETURN_BOOL(res);
227 }
228 
229 Datum
ltq_rregex(PG_FUNCTION_ARGS)230 ltq_rregex(PG_FUNCTION_ARGS)
231 {
232 	PG_RETURN_DATUM(DirectFunctionCall2(ltq_regex,
233 										PG_GETARG_DATUM(1),
234 										PG_GETARG_DATUM(0)
235 										));
236 }
237 
238 Datum
lt_q_regex(PG_FUNCTION_ARGS)239 lt_q_regex(PG_FUNCTION_ARGS)
240 {
241 	ltree	   *tree = PG_GETARG_LTREE_P(0);
242 	ArrayType  *_query = PG_GETARG_ARRAYTYPE_P(1);
243 	lquery	   *query = (lquery *) ARR_DATA_PTR(_query);
244 	bool		res = false;
245 	int			num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
246 
247 	if (ARR_NDIM(_query) > 1)
248 		ereport(ERROR,
249 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
250 				 errmsg("array must be one-dimensional")));
251 	if (array_contains_nulls(_query))
252 		ereport(ERROR,
253 				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
254 				 errmsg("array must not contain nulls")));
255 
256 	while (num > 0)
257 	{
258 		if (DatumGetBool(DirectFunctionCall2(ltq_regex,
259 											 PointerGetDatum(tree), PointerGetDatum(query))))
260 		{
261 
262 			res = true;
263 			break;
264 		}
265 		num--;
266 		query = NEXTVAL(query);
267 	}
268 
269 	PG_FREE_IF_COPY(tree, 0);
270 	PG_FREE_IF_COPY(_query, 1);
271 	PG_RETURN_BOOL(res);
272 }
273 
274 Datum
lt_q_rregex(PG_FUNCTION_ARGS)275 lt_q_rregex(PG_FUNCTION_ARGS)
276 {
277 	PG_RETURN_DATUM(DirectFunctionCall2(lt_q_regex,
278 										PG_GETARG_DATUM(1),
279 										PG_GETARG_DATUM(0)
280 										));
281 }
282