1 /*-------------------------------------------------------------------------
2  *
3  * tsquery_rewrite.c
4  *	  Utilities for reconstructing tsquery
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *	  src/backend/utils/adt/tsquery_rewrite.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "catalog/pg_type.h"
18 #include "executor/spi.h"
19 #include "miscadmin.h"
20 #include "tsearch/ts_utils.h"
21 #include "utils/builtins.h"
22 
23 
24 /*
25  * If "node" is equal to "ex", return a copy of "subs" instead.
26  * If "ex" matches a subset of node's children, return a modified version
27  * of "node" in which those children are replaced with a copy of "subs".
28  * Otherwise return "node" unmodified.
29  *
30  * The QTN_NOCHANGE bit is set in successfully modified nodes, so that
31  * we won't uselessly recurse into them.
32  * Also, set *isfind true if we make a replacement.
33  */
34 static QTNode *
findeq(QTNode * node,QTNode * ex,QTNode * subs,bool * isfind)35 findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
36 {
37 	/* Can't match unless signature matches and node type matches. */
38 	if ((node->sign & ex->sign) != ex->sign ||
39 		node->valnode->type != ex->valnode->type)
40 		return node;
41 
42 	/* Ignore nodes marked NOCHANGE, too. */
43 	if (node->flags & QTN_NOCHANGE)
44 		return node;
45 
46 	if (node->valnode->type == QI_OPR)
47 	{
48 		/* Must be same operator. */
49 		if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
50 			return node;
51 
52 		if (node->nchild == ex->nchild)
53 		{
54 			/*
55 			 * Simple case: when same number of children, match if equal.
56 			 * (This is reliable when the children were sorted earlier.)
57 			 */
58 			if (QTNEq(node, ex))
59 			{
60 				/* Match; delete node and return a copy of subs instead. */
61 				QTNFree(node);
62 				if (subs)
63 				{
64 					node = QTNCopy(subs);
65 					node->flags |= QTN_NOCHANGE;
66 				}
67 				else
68 					node = NULL;
69 				*isfind = true;
70 			}
71 		}
72 		else if (node->nchild > ex->nchild && ex->nchild > 0)
73 		{
74 			/*
75 			 * AND and OR are commutative/associative, so we should check if a
76 			 * subset of the children match.  For example, if node is A|B|C,
77 			 * and ex is B|C, we have a match after we notionally convert node
78 			 * to A|(B|C).  This does not work for NOT or PHRASE nodes, but we
79 			 * can't get here for those node types because they have a fixed
80 			 * number of children.
81 			 *
82 			 * Because we expect that the children are sorted, it suffices to
83 			 * make one pass through the two lists to find the matches.
84 			 */
85 			bool	   *matched;
86 			int			nmatched;
87 			int			i,
88 						j;
89 
90 			/* Assert that the subset rule is OK */
91 			Assert(node->valnode->qoperator.oper == OP_AND ||
92 				   node->valnode->qoperator.oper == OP_OR);
93 
94 			/* matched[] will record which children of node matched */
95 			matched = (bool *) palloc0(node->nchild * sizeof(bool));
96 			nmatched = 0;
97 			i = j = 0;
98 			while (i < node->nchild && j < ex->nchild)
99 			{
100 				int			cmp = QTNodeCompare(node->child[i], ex->child[j]);
101 
102 				if (cmp == 0)
103 				{
104 					/* match! */
105 					matched[i] = true;
106 					nmatched++;
107 					i++, j++;
108 				}
109 				else if (cmp < 0)
110 				{
111 					/* node->child[i] has no match, ignore it */
112 					i++;
113 				}
114 				else
115 				{
116 					/* ex->child[j] has no match; we can give up immediately */
117 					break;
118 				}
119 			}
120 
121 			if (nmatched == ex->nchild)
122 			{
123 				/* collapse out the matched children of node */
124 				j = 0;
125 				for (i = 0; i < node->nchild; i++)
126 				{
127 					if (matched[i])
128 						QTNFree(node->child[i]);
129 					else
130 						node->child[j++] = node->child[i];
131 				}
132 
133 				/* and instead insert a copy of subs */
134 				if (subs)
135 				{
136 					subs = QTNCopy(subs);
137 					subs->flags |= QTN_NOCHANGE;
138 					node->child[j++] = subs;
139 				}
140 
141 				node->nchild = j;
142 
143 				/*
144 				 * At this point we might have a node with zero or one child,
145 				 * which should be simplified.  But we leave it to our caller
146 				 * (dofindsubquery) to take care of that.
147 				 */
148 
149 				/*
150 				 * Re-sort the node to put new child in the right place.  This
151 				 * is a bit bogus, because it won't matter for findsubquery's
152 				 * remaining processing, and it's insufficient to prepare the
153 				 * tree for another search (we would need to re-flatten as
154 				 * well, and we don't want to do that because we'd lose the
155 				 * QTN_NOCHANGE marking on the new child).  But it's needed to
156 				 * keep the results the same as the regression tests expect.
157 				 */
158 				QTNSort(node);
159 
160 				*isfind = true;
161 			}
162 
163 			pfree(matched);
164 		}
165 	}
166 	else
167 	{
168 		Assert(node->valnode->type == QI_VAL);
169 
170 		if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc)
171 			return node;
172 		else if (QTNEq(node, ex))
173 		{
174 			QTNFree(node);
175 			if (subs)
176 			{
177 				node = QTNCopy(subs);
178 				node->flags |= QTN_NOCHANGE;
179 			}
180 			else
181 			{
182 				node = NULL;
183 			}
184 			*isfind = true;
185 		}
186 	}
187 
188 	return node;
189 }
190 
191 /*
192  * Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
193  * at the root node, and if we failed to do so, recursively match against
194  * child nodes.
195  *
196  * Delete any void subtrees resulting from the replacement.
197  * In the following example '5' is replaced by empty operand:
198  *
199  *	  AND		->	  6
200  *	 /	 \
201  *	5	 OR
202  *		/  \
203  *	   6	5
204  */
205 static QTNode *
dofindsubquery(QTNode * root,QTNode * ex,QTNode * subs,bool * isfind)206 dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
207 {
208 	/* since this function recurses, it could be driven to stack overflow. */
209 	check_stack_depth();
210 
211 	/* also, since it's a bit expensive, let's check for query cancel. */
212 	CHECK_FOR_INTERRUPTS();
213 
214 	/* match at the node itself */
215 	root = findeq(root, ex, subs, isfind);
216 
217 	/* unless we matched here, consider matches at child nodes */
218 	if (root && (root->flags & QTN_NOCHANGE) == 0 &&
219 		root->valnode->type == QI_OPR)
220 	{
221 		int			i,
222 					j = 0;
223 
224 		/*
225 		 * Any subtrees that are replaced by NULL must be dropped from the
226 		 * tree.
227 		 */
228 		for (i = 0; i < root->nchild; i++)
229 		{
230 			root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind);
231 			if (root->child[j])
232 				j++;
233 		}
234 
235 		root->nchild = j;
236 
237 		/*
238 		 * If we have just zero or one remaining child node, simplify out this
239 		 * operator node.
240 		 */
241 		if (root->nchild == 0)
242 		{
243 			QTNFree(root);
244 			root = NULL;
245 		}
246 		else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT)
247 		{
248 			QTNode	   *nroot = root->child[0];
249 
250 			pfree(root);
251 			root = nroot;
252 		}
253 	}
254 
255 	return root;
256 }
257 
258 /*
259  * Substitute "subs" for "ex" throughout the QTNode tree at root.
260  *
261  * If isfind isn't NULL, set *isfind to show whether we made any substitution.
262  *
263  * Both "root" and "ex" must have been through QTNTernary and QTNSort
264  * to ensure reliable matching.
265  */
266 QTNode *
findsubquery(QTNode * root,QTNode * ex,QTNode * subs,bool * isfind)267 findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
268 {
269 	bool		DidFind = false;
270 
271 	root = dofindsubquery(root, ex, subs, &DidFind);
272 
273 	if (isfind)
274 		*isfind = DidFind;
275 
276 	return root;
277 }
278 
279 Datum
tsquery_rewrite_query(PG_FUNCTION_ARGS)280 tsquery_rewrite_query(PG_FUNCTION_ARGS)
281 {
282 	TSQuery		query = PG_GETARG_TSQUERY_COPY(0);
283 	text	   *in = PG_GETARG_TEXT_PP(1);
284 	TSQuery		rewrited = query;
285 	MemoryContext outercontext = CurrentMemoryContext;
286 	MemoryContext oldcontext;
287 	QTNode	   *tree;
288 	char	   *buf;
289 	SPIPlanPtr	plan;
290 	Portal		portal;
291 	bool		isnull;
292 
293 	if (query->size == 0)
294 	{
295 		PG_FREE_IF_COPY(in, 1);
296 		PG_RETURN_POINTER(rewrited);
297 	}
298 
299 	tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
300 	QTNTernary(tree);
301 	QTNSort(tree);
302 
303 	buf = text_to_cstring(in);
304 
305 	SPI_connect();
306 
307 	if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
308 		elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
309 
310 	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
311 		elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
312 
313 	SPI_cursor_fetch(portal, true, 100);
314 
315 	if (SPI_tuptable == NULL ||
316 		SPI_tuptable->tupdesc->natts != 2 ||
317 		SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
318 		SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
319 		ereport(ERROR,
320 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
321 				 errmsg("ts_rewrite query must return two tsquery columns")));
322 
323 	while (SPI_processed > 0 && tree)
324 	{
325 		uint64		i;
326 
327 		for (i = 0; i < SPI_processed && tree; i++)
328 		{
329 			Datum		qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
330 			Datum		sdata;
331 
332 			if (isnull)
333 				continue;
334 
335 			sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
336 
337 			if (!isnull)
338 			{
339 				TSQuery		qtex = DatumGetTSQuery(qdata);
340 				TSQuery		qtsubs = DatumGetTSQuery(sdata);
341 				QTNode	   *qex,
342 						   *qsubs = NULL;
343 
344 				if (qtex->size == 0)
345 				{
346 					if (qtex != (TSQuery) DatumGetPointer(qdata))
347 						pfree(qtex);
348 					if (qtsubs != (TSQuery) DatumGetPointer(sdata))
349 						pfree(qtsubs);
350 					continue;
351 				}
352 
353 				qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
354 
355 				QTNTernary(qex);
356 				QTNSort(qex);
357 
358 				if (qtsubs->size)
359 					qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
360 
361 				oldcontext = MemoryContextSwitchTo(outercontext);
362 				tree = findsubquery(tree, qex, qsubs, NULL);
363 				MemoryContextSwitchTo(oldcontext);
364 
365 				QTNFree(qex);
366 				if (qtex != (TSQuery) DatumGetPointer(qdata))
367 					pfree(qtex);
368 				QTNFree(qsubs);
369 				if (qtsubs != (TSQuery) DatumGetPointer(sdata))
370 					pfree(qtsubs);
371 
372 				if (tree)
373 				{
374 					/* ready the tree for another pass */
375 					QTNClearFlags(tree, QTN_NOCHANGE);
376 					QTNTernary(tree);
377 					QTNSort(tree);
378 				}
379 			}
380 		}
381 
382 		SPI_freetuptable(SPI_tuptable);
383 		SPI_cursor_fetch(portal, true, 100);
384 	}
385 
386 	SPI_freetuptable(SPI_tuptable);
387 	SPI_cursor_close(portal);
388 	SPI_freeplan(plan);
389 	SPI_finish();
390 
391 	if (tree)
392 	{
393 		QTNBinary(tree);
394 		rewrited = QTN2QT(tree);
395 		QTNFree(tree);
396 		PG_FREE_IF_COPY(query, 0);
397 	}
398 	else
399 	{
400 		SET_VARSIZE(rewrited, HDRSIZETQ);
401 		rewrited->size = 0;
402 	}
403 
404 	pfree(buf);
405 	PG_FREE_IF_COPY(in, 1);
406 	PG_RETURN_POINTER(rewrited);
407 }
408 
409 Datum
tsquery_rewrite(PG_FUNCTION_ARGS)410 tsquery_rewrite(PG_FUNCTION_ARGS)
411 {
412 	TSQuery		query = PG_GETARG_TSQUERY_COPY(0);
413 	TSQuery		ex = PG_GETARG_TSQUERY(1);
414 	TSQuery		subst = PG_GETARG_TSQUERY(2);
415 	TSQuery		rewrited = query;
416 	QTNode	   *tree,
417 			   *qex,
418 			   *subs = NULL;
419 
420 	if (query->size == 0 || ex->size == 0)
421 	{
422 		PG_FREE_IF_COPY(ex, 1);
423 		PG_FREE_IF_COPY(subst, 2);
424 		PG_RETURN_POINTER(rewrited);
425 	}
426 
427 	tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
428 	QTNTernary(tree);
429 	QTNSort(tree);
430 
431 	qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
432 	QTNTernary(qex);
433 	QTNSort(qex);
434 
435 	if (subst->size)
436 		subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
437 
438 	tree = findsubquery(tree, qex, subs, NULL);
439 
440 	QTNFree(qex);
441 	QTNFree(subs);
442 
443 	if (!tree)
444 	{
445 		SET_VARSIZE(rewrited, HDRSIZETQ);
446 		rewrited->size = 0;
447 		PG_FREE_IF_COPY(ex, 1);
448 		PG_FREE_IF_COPY(subst, 2);
449 		PG_RETURN_POINTER(rewrited);
450 	}
451 	else
452 	{
453 		QTNBinary(tree);
454 		rewrited = QTN2QT(tree);
455 		QTNFree(tree);
456 	}
457 
458 	PG_FREE_IF_COPY(query, 0);
459 	PG_FREE_IF_COPY(ex, 1);
460 	PG_FREE_IF_COPY(subst, 2);
461 	PG_RETURN_POINTER(rewrited);
462 }
463