1 /*-------------------------------------------------------------------------
2  *
3  * prepqual.c
4  *	  Routines for preprocessing qualification expressions
5  *
6  *
7  * While the parser will produce flattened (N-argument) AND/OR trees from
8  * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9  * directly underneath another AND, or OR underneath OR, if the input was
10  * oddly parenthesized.  Also, rule expansion and subquery flattening could
11  * produce such parsetrees.  The planner wants to flatten all such cases
12  * to ensure consistent optimization behavior.
13  *
14  * Formerly, this module was responsible for doing the initial flattening,
15  * but now we leave it to eval_const_expressions to do that since it has to
16  * make a complete pass over the expression tree anyway.  Instead, we just
17  * have to ensure that our manipulations preserve AND/OR flatness.
18  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19  * tree after local transformations that might introduce nested AND/ORs.
20  *
21  *
22  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  *	  src/backend/optimizer/prep/prepqual.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #include "optimizer/optimizer.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
39 
40 
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_duplicate_ors(Expr *qual, bool is_check);
44 static Expr *process_duplicate_ors(List *orlist);
45 
46 
47 /*
48  * negate_clause
49  *	  Negate a Boolean expression.
50  *
51  * Input is a clause to be negated (e.g., the argument of a NOT clause).
52  * Returns a new clause equivalent to the negation of the given clause.
53  *
54  * Although this can be invoked on its own, it's mainly intended as a helper
55  * for eval_const_expressions(), and that context drives several design
56  * decisions.  In particular, if the input is already AND/OR flat, we must
57  * preserve that property.  We also don't bother to recurse in situations
58  * where we can assume that lower-level executions of eval_const_expressions
59  * would already have simplified sub-clauses of the input.
60  *
61  * The difference between this and a simple make_notclause() is that this
62  * tries to get rid of the NOT node by logical simplification.  It's clearly
63  * always a win if the NOT node can be eliminated altogether.  However, our
64  * use of DeMorgan's laws could result in having more NOT nodes rather than
65  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
66  * important to expose as much top-level AND/OR structure as possible.
67  * Also, eliminating an intermediate NOT may allow us to flatten two levels
68  * of AND or OR together that we couldn't have otherwise.  Finally, one of
69  * the motivations for doing this is to ensure that logically equivalent
70  * expressions will be seen as physically equal(), so we should always apply
71  * the same transformations.
72  */
73 Node *
negate_clause(Node * node)74 negate_clause(Node *node)
75 {
76 	if (node == NULL)			/* should not happen */
77 		elog(ERROR, "can't negate an empty subexpression");
78 	switch (nodeTag(node))
79 	{
80 		case T_Const:
81 			{
82 				Const	   *c = (Const *) node;
83 
84 				/* NOT NULL is still NULL */
85 				if (c->constisnull)
86 					return makeBoolConst(false, true);
87 				/* otherwise pretty easy */
88 				return makeBoolConst(!DatumGetBool(c->constvalue), false);
89 			}
90 			break;
91 		case T_OpExpr:
92 			{
93 				/*
94 				 * Negate operator if possible: (NOT (< A B)) => (>= A B)
95 				 */
96 				OpExpr	   *opexpr = (OpExpr *) node;
97 				Oid			negator = get_negator(opexpr->opno);
98 
99 				if (negator)
100 				{
101 					OpExpr	   *newopexpr = makeNode(OpExpr);
102 
103 					newopexpr->opno = negator;
104 					newopexpr->opfuncid = InvalidOid;
105 					newopexpr->opresulttype = opexpr->opresulttype;
106 					newopexpr->opretset = opexpr->opretset;
107 					newopexpr->opcollid = opexpr->opcollid;
108 					newopexpr->inputcollid = opexpr->inputcollid;
109 					newopexpr->args = opexpr->args;
110 					newopexpr->location = opexpr->location;
111 					return (Node *) newopexpr;
112 				}
113 			}
114 			break;
115 		case T_ScalarArrayOpExpr:
116 			{
117 				/*
118 				 * Negate a ScalarArrayOpExpr if its operator has a negator;
119 				 * for example x = ANY (list) becomes x <> ALL (list)
120 				 */
121 				ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
122 				Oid			negator = get_negator(saopexpr->opno);
123 
124 				if (negator)
125 				{
126 					ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
127 
128 					newopexpr->opno = negator;
129 					newopexpr->opfuncid = InvalidOid;
130 					newopexpr->hashfuncid = InvalidOid;
131 					newopexpr->useOr = !saopexpr->useOr;
132 					newopexpr->inputcollid = saopexpr->inputcollid;
133 					newopexpr->args = saopexpr->args;
134 					newopexpr->location = saopexpr->location;
135 					return (Node *) newopexpr;
136 				}
137 			}
138 			break;
139 		case T_BoolExpr:
140 			{
141 				BoolExpr   *expr = (BoolExpr *) node;
142 
143 				switch (expr->boolop)
144 				{
145 						/*--------------------
146 						 * Apply DeMorgan's Laws:
147 						 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
148 						 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
149 						 * i.e., swap AND for OR and negate each subclause.
150 						 *
151 						 * If the input is already AND/OR flat and has no NOT
152 						 * directly above AND or OR, this transformation preserves
153 						 * those properties.  For example, if no direct child of
154 						 * the given AND clause is an AND or a NOT-above-OR, then
155 						 * the recursive calls of negate_clause() can't return any
156 						 * OR clauses.  So we needn't call pull_ors() before
157 						 * building a new OR clause.  Similarly for the OR case.
158 						 *--------------------
159 						 */
160 					case AND_EXPR:
161 						{
162 							List	   *nargs = NIL;
163 							ListCell   *lc;
164 
165 							foreach(lc, expr->args)
166 							{
167 								nargs = lappend(nargs,
168 												negate_clause(lfirst(lc)));
169 							}
170 							return (Node *) make_orclause(nargs);
171 						}
172 						break;
173 					case OR_EXPR:
174 						{
175 							List	   *nargs = NIL;
176 							ListCell   *lc;
177 
178 							foreach(lc, expr->args)
179 							{
180 								nargs = lappend(nargs,
181 												negate_clause(lfirst(lc)));
182 							}
183 							return (Node *) make_andclause(nargs);
184 						}
185 						break;
186 					case NOT_EXPR:
187 
188 						/*
189 						 * NOT underneath NOT: they cancel.  We assume the
190 						 * input is already simplified, so no need to recurse.
191 						 */
192 						return (Node *) linitial(expr->args);
193 					default:
194 						elog(ERROR, "unrecognized boolop: %d",
195 							 (int) expr->boolop);
196 						break;
197 				}
198 			}
199 			break;
200 		case T_NullTest:
201 			{
202 				NullTest   *expr = (NullTest *) node;
203 
204 				/*
205 				 * In the rowtype case, the two flavors of NullTest are *not*
206 				 * logical inverses, so we can't simplify.  But it does work
207 				 * for scalar datatypes.
208 				 */
209 				if (!expr->argisrow)
210 				{
211 					NullTest   *newexpr = makeNode(NullTest);
212 
213 					newexpr->arg = expr->arg;
214 					newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
215 											 IS_NOT_NULL : IS_NULL);
216 					newexpr->argisrow = expr->argisrow;
217 					newexpr->location = expr->location;
218 					return (Node *) newexpr;
219 				}
220 			}
221 			break;
222 		case T_BooleanTest:
223 			{
224 				BooleanTest *expr = (BooleanTest *) node;
225 				BooleanTest *newexpr = makeNode(BooleanTest);
226 
227 				newexpr->arg = expr->arg;
228 				switch (expr->booltesttype)
229 				{
230 					case IS_TRUE:
231 						newexpr->booltesttype = IS_NOT_TRUE;
232 						break;
233 					case IS_NOT_TRUE:
234 						newexpr->booltesttype = IS_TRUE;
235 						break;
236 					case IS_FALSE:
237 						newexpr->booltesttype = IS_NOT_FALSE;
238 						break;
239 					case IS_NOT_FALSE:
240 						newexpr->booltesttype = IS_FALSE;
241 						break;
242 					case IS_UNKNOWN:
243 						newexpr->booltesttype = IS_NOT_UNKNOWN;
244 						break;
245 					case IS_NOT_UNKNOWN:
246 						newexpr->booltesttype = IS_UNKNOWN;
247 						break;
248 					default:
249 						elog(ERROR, "unrecognized booltesttype: %d",
250 							 (int) expr->booltesttype);
251 						break;
252 				}
253 				newexpr->location = expr->location;
254 				return (Node *) newexpr;
255 			}
256 			break;
257 		default:
258 			/* else fall through */
259 			break;
260 	}
261 
262 	/*
263 	 * Otherwise we don't know how to simplify this, so just tack on an
264 	 * explicit NOT node.
265 	 */
266 	return (Node *) make_notclause((Expr *) node);
267 }
268 
269 
270 /*
271  * canonicalize_qual
272  *	  Convert a qualification expression to the most useful form.
273  *
274  * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
275  * clauses.  It can also be used on top-level CHECK constraints, for which
276  * pass is_check = true.  DO NOT call it on any expression that is not known
277  * to be one or the other, as it might apply inappropriate simplifications.
278  *
279  * The name of this routine is a holdover from a time when it would try to
280  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
281  * Eventually, we recognized that that had more theoretical purity than
282  * actual usefulness, and so now the transformation doesn't involve any
283  * notion of reaching a canonical form.
284  *
285  * NOTE: we assume the input has already been through eval_const_expressions
286  * and therefore possesses AND/OR flatness.  Formerly this function included
287  * its own flattening logic, but that requires a useless extra pass over the
288  * tree.
289  *
290  * Returns the modified qualification.
291  */
292 Expr *
canonicalize_qual(Expr * qual,bool is_check)293 canonicalize_qual(Expr *qual, bool is_check)
294 {
295 	Expr	   *newqual;
296 
297 	/* Quick exit for empty qual */
298 	if (qual == NULL)
299 		return NULL;
300 
301 	/* This should not be invoked on quals in implicit-AND format */
302 	Assert(!IsA(qual, List));
303 
304 	/*
305 	 * Pull up redundant subclauses in OR-of-AND trees.  We do this only
306 	 * within the top-level AND/OR structure; there's no point in looking
307 	 * deeper.  Also remove any NULL constants in the top-level structure.
308 	 */
309 	newqual = find_duplicate_ors(qual, is_check);
310 
311 	return newqual;
312 }
313 
314 
315 /*
316  * pull_ands
317  *	  Recursively flatten nested AND clauses into a single and-clause list.
318  *
319  * Input is the arglist of an AND clause.
320  * Returns the rebuilt arglist (note original list structure is not touched).
321  */
322 static List *
pull_ands(List * andlist)323 pull_ands(List *andlist)
324 {
325 	List	   *out_list = NIL;
326 	ListCell   *arg;
327 
328 	foreach(arg, andlist)
329 	{
330 		Node	   *subexpr = (Node *) lfirst(arg);
331 
332 		if (is_andclause(subexpr))
333 			out_list = list_concat(out_list,
334 								   pull_ands(((BoolExpr *) subexpr)->args));
335 		else
336 			out_list = lappend(out_list, subexpr);
337 	}
338 	return out_list;
339 }
340 
341 /*
342  * pull_ors
343  *	  Recursively flatten nested OR clauses into a single or-clause list.
344  *
345  * Input is the arglist of an OR clause.
346  * Returns the rebuilt arglist (note original list structure is not touched).
347  */
348 static List *
pull_ors(List * orlist)349 pull_ors(List *orlist)
350 {
351 	List	   *out_list = NIL;
352 	ListCell   *arg;
353 
354 	foreach(arg, orlist)
355 	{
356 		Node	   *subexpr = (Node *) lfirst(arg);
357 
358 		if (is_orclause(subexpr))
359 			out_list = list_concat(out_list,
360 								   pull_ors(((BoolExpr *) subexpr)->args));
361 		else
362 			out_list = lappend(out_list, subexpr);
363 	}
364 	return out_list;
365 }
366 
367 
368 /*--------------------
369  * The following code attempts to apply the inverse OR distributive law:
370  *		((A AND B) OR (A AND C))  =>  (A AND (B OR C))
371  * That is, locate OR clauses in which every subclause contains an
372  * identical term, and pull out the duplicated terms.
373  *
374  * This may seem like a fairly useless activity, but it turns out to be
375  * applicable to many machine-generated queries, and there are also queries
376  * in some of the TPC benchmarks that need it.  This was in fact almost the
377  * sole useful side-effect of the old prepqual code that tried to force
378  * the query into canonical AND-of-ORs form: the canonical equivalent of
379  *		((A AND B) OR (A AND C))
380  * is
381  *		((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
382  * which the code was able to simplify to
383  *		(A AND (A OR C) AND (B OR A) AND (B OR C))
384  * thus successfully extracting the common condition A --- but at the cost
385  * of cluttering the qual with many redundant clauses.
386  *--------------------
387  */
388 
389 /*
390  * find_duplicate_ors
391  *	  Given a qualification tree with the NOTs pushed down, search for
392  *	  OR clauses to which the inverse OR distributive law might apply.
393  *	  Only the top-level AND/OR structure is searched.
394  *
395  * While at it, we remove any NULL constants within the top-level AND/OR
396  * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
397  * In general that would change the result, so eval_const_expressions can't
398  * do it; but at top level of WHERE, we don't need to distinguish between
399  * FALSE and NULL results, so it's valid to treat NULL::boolean the same
400  * as FALSE and then simplify AND/OR accordingly.  Conversely, in a top-level
401  * CHECK constraint, we may treat a NULL the same as TRUE.
402  *
403  * Returns the modified qualification.  AND/OR flatness is preserved.
404  */
405 static Expr *
find_duplicate_ors(Expr * qual,bool is_check)406 find_duplicate_ors(Expr *qual, bool is_check)
407 {
408 	if (is_orclause(qual))
409 	{
410 		List	   *orlist = NIL;
411 		ListCell   *temp;
412 
413 		/* Recurse */
414 		foreach(temp, ((BoolExpr *) qual)->args)
415 		{
416 			Expr	   *arg = (Expr *) lfirst(temp);
417 
418 			arg = find_duplicate_ors(arg, is_check);
419 
420 			/* Get rid of any constant inputs */
421 			if (arg && IsA(arg, Const))
422 			{
423 				Const	   *carg = (Const *) arg;
424 
425 				if (is_check)
426 				{
427 					/* Within OR in CHECK, drop constant FALSE */
428 					if (!carg->constisnull && !DatumGetBool(carg->constvalue))
429 						continue;
430 					/* Constant TRUE or NULL, so OR reduces to TRUE */
431 					return (Expr *) makeBoolConst(true, false);
432 				}
433 				else
434 				{
435 					/* Within OR in WHERE, drop constant FALSE or NULL */
436 					if (carg->constisnull || !DatumGetBool(carg->constvalue))
437 						continue;
438 					/* Constant TRUE, so OR reduces to TRUE */
439 					return arg;
440 				}
441 			}
442 
443 			orlist = lappend(orlist, arg);
444 		}
445 
446 		/* Flatten any ORs pulled up to just below here */
447 		orlist = pull_ors(orlist);
448 
449 		/* Now we can look for duplicate ORs */
450 		return process_duplicate_ors(orlist);
451 	}
452 	else if (is_andclause(qual))
453 	{
454 		List	   *andlist = NIL;
455 		ListCell   *temp;
456 
457 		/* Recurse */
458 		foreach(temp, ((BoolExpr *) qual)->args)
459 		{
460 			Expr	   *arg = (Expr *) lfirst(temp);
461 
462 			arg = find_duplicate_ors(arg, is_check);
463 
464 			/* Get rid of any constant inputs */
465 			if (arg && IsA(arg, Const))
466 			{
467 				Const	   *carg = (Const *) arg;
468 
469 				if (is_check)
470 				{
471 					/* Within AND in CHECK, drop constant TRUE or NULL */
472 					if (carg->constisnull || DatumGetBool(carg->constvalue))
473 						continue;
474 					/* Constant FALSE, so AND reduces to FALSE */
475 					return arg;
476 				}
477 				else
478 				{
479 					/* Within AND in WHERE, drop constant TRUE */
480 					if (!carg->constisnull && DatumGetBool(carg->constvalue))
481 						continue;
482 					/* Constant FALSE or NULL, so AND reduces to FALSE */
483 					return (Expr *) makeBoolConst(false, false);
484 				}
485 			}
486 
487 			andlist = lappend(andlist, arg);
488 		}
489 
490 		/* Flatten any ANDs introduced just below here */
491 		andlist = pull_ands(andlist);
492 
493 		/* AND of no inputs reduces to TRUE */
494 		if (andlist == NIL)
495 			return (Expr *) makeBoolConst(true, false);
496 
497 		/* Single-expression AND just reduces to that expression */
498 		if (list_length(andlist) == 1)
499 			return (Expr *) linitial(andlist);
500 
501 		/* Else we still need an AND node */
502 		return make_andclause(andlist);
503 	}
504 	else
505 		return qual;
506 }
507 
508 /*
509  * process_duplicate_ors
510  *	  Given a list of exprs which are ORed together, try to apply
511  *	  the inverse OR distributive law.
512  *
513  * Returns the resulting expression (could be an AND clause, an OR
514  * clause, or maybe even a single subexpression).
515  */
516 static Expr *
process_duplicate_ors(List * orlist)517 process_duplicate_ors(List *orlist)
518 {
519 	List	   *reference = NIL;
520 	int			num_subclauses = 0;
521 	List	   *winners;
522 	List	   *neworlist;
523 	ListCell   *temp;
524 
525 	/* OR of no inputs reduces to FALSE */
526 	if (orlist == NIL)
527 		return (Expr *) makeBoolConst(false, false);
528 
529 	/* Single-expression OR just reduces to that expression */
530 	if (list_length(orlist) == 1)
531 		return (Expr *) linitial(orlist);
532 
533 	/*
534 	 * Choose the shortest AND clause as the reference list --- obviously, any
535 	 * subclause not in this clause isn't in all the clauses. If we find a
536 	 * clause that's not an AND, we can treat it as a one-element AND clause,
537 	 * which necessarily wins as shortest.
538 	 */
539 	foreach(temp, orlist)
540 	{
541 		Expr	   *clause = (Expr *) lfirst(temp);
542 
543 		if (is_andclause(clause))
544 		{
545 			List	   *subclauses = ((BoolExpr *) clause)->args;
546 			int			nclauses = list_length(subclauses);
547 
548 			if (reference == NIL || nclauses < num_subclauses)
549 			{
550 				reference = subclauses;
551 				num_subclauses = nclauses;
552 			}
553 		}
554 		else
555 		{
556 			reference = list_make1(clause);
557 			break;
558 		}
559 	}
560 
561 	/*
562 	 * Just in case, eliminate any duplicates in the reference list.
563 	 */
564 	reference = list_union(NIL, reference);
565 
566 	/*
567 	 * Check each element of the reference list to see if it's in all the OR
568 	 * clauses.  Build a new list of winning clauses.
569 	 */
570 	winners = NIL;
571 	foreach(temp, reference)
572 	{
573 		Expr	   *refclause = (Expr *) lfirst(temp);
574 		bool		win = true;
575 		ListCell   *temp2;
576 
577 		foreach(temp2, orlist)
578 		{
579 			Expr	   *clause = (Expr *) lfirst(temp2);
580 
581 			if (is_andclause(clause))
582 			{
583 				if (!list_member(((BoolExpr *) clause)->args, refclause))
584 				{
585 					win = false;
586 					break;
587 				}
588 			}
589 			else
590 			{
591 				if (!equal(refclause, clause))
592 				{
593 					win = false;
594 					break;
595 				}
596 			}
597 		}
598 
599 		if (win)
600 			winners = lappend(winners, refclause);
601 	}
602 
603 	/*
604 	 * If no winners, we can't transform the OR
605 	 */
606 	if (winners == NIL)
607 		return make_orclause(orlist);
608 
609 	/*
610 	 * Generate new OR list consisting of the remaining sub-clauses.
611 	 *
612 	 * If any clause degenerates to empty, then we have a situation like (A
613 	 * AND B) OR (A), which can be reduced to just A --- that is, the
614 	 * additional conditions in other arms of the OR are irrelevant.
615 	 *
616 	 * Note that because we use list_difference, any multiple occurrences of a
617 	 * winning clause in an AND sub-clause will be removed automatically.
618 	 */
619 	neworlist = NIL;
620 	foreach(temp, orlist)
621 	{
622 		Expr	   *clause = (Expr *) lfirst(temp);
623 
624 		if (is_andclause(clause))
625 		{
626 			List	   *subclauses = ((BoolExpr *) clause)->args;
627 
628 			subclauses = list_difference(subclauses, winners);
629 			if (subclauses != NIL)
630 			{
631 				if (list_length(subclauses) == 1)
632 					neworlist = lappend(neworlist, linitial(subclauses));
633 				else
634 					neworlist = lappend(neworlist, make_andclause(subclauses));
635 			}
636 			else
637 			{
638 				neworlist = NIL;	/* degenerate case, see above */
639 				break;
640 			}
641 		}
642 		else
643 		{
644 			if (!list_member(winners, clause))
645 				neworlist = lappend(neworlist, clause);
646 			else
647 			{
648 				neworlist = NIL;	/* degenerate case, see above */
649 				break;
650 			}
651 		}
652 	}
653 
654 	/*
655 	 * Append reduced OR to the winners list, if it's not degenerate, handling
656 	 * the special case of one element correctly (can that really happen?).
657 	 * Also be careful to maintain AND/OR flatness in case we pulled up a
658 	 * sub-sub-OR-clause.
659 	 */
660 	if (neworlist != NIL)
661 	{
662 		if (list_length(neworlist) == 1)
663 			winners = lappend(winners, linitial(neworlist));
664 		else
665 			winners = lappend(winners, make_orclause(pull_ors(neworlist)));
666 	}
667 
668 	/*
669 	 * And return the constructed AND clause, again being wary of a single
670 	 * element and AND/OR flatness.
671 	 */
672 	if (list_length(winners) == 1)
673 		return (Expr *) linitial(winners);
674 	else
675 		return make_andclause(pull_ands(winners));
676 }
677