1 /*-------------------------------------------------------------------------
2  *
3  * prepqual.c
4  *	  Routines for preprocessing qualification expressions
5  *
6  *
7  * While the parser will produce flattened (N-argument) AND/OR trees from
8  * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9  * directly underneath another AND, or OR underneath OR, if the input was
10  * oddly parenthesized.  Also, rule expansion and subquery flattening could
11  * produce such parsetrees.  The planner wants to flatten all such cases
12  * to ensure consistent optimization behavior.
13  *
14  * Formerly, this module was responsible for doing the initial flattening,
15  * but now we leave it to eval_const_expressions to do that since it has to
16  * make a complete pass over the expression tree anyway.  Instead, we just
17  * have to ensure that our manipulations preserve AND/OR flatness.
18  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19  * tree after local transformations that might introduce nested AND/ORs.
20  *
21  *
22  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  *	  src/backend/optimizer/prep/prepqual.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #include "optimizer/optimizer.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
39 
40 
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_duplicate_ors(Expr *qual, bool is_check);
44 static Expr *process_duplicate_ors(List *orlist);
45 
46 
47 /*
48  * negate_clause
49  *	  Negate a Boolean expression.
50  *
51  * Input is a clause to be negated (e.g., the argument of a NOT clause).
52  * Returns a new clause equivalent to the negation of the given clause.
53  *
54  * Although this can be invoked on its own, it's mainly intended as a helper
55  * for eval_const_expressions(), and that context drives several design
56  * decisions.  In particular, if the input is already AND/OR flat, we must
57  * preserve that property.  We also don't bother to recurse in situations
58  * where we can assume that lower-level executions of eval_const_expressions
59  * would already have simplified sub-clauses of the input.
60  *
61  * The difference between this and a simple make_notclause() is that this
62  * tries to get rid of the NOT node by logical simplification.  It's clearly
63  * always a win if the NOT node can be eliminated altogether.  However, our
64  * use of DeMorgan's laws could result in having more NOT nodes rather than
65  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
66  * important to expose as much top-level AND/OR structure as possible.
67  * Also, eliminating an intermediate NOT may allow us to flatten two levels
68  * of AND or OR together that we couldn't have otherwise.  Finally, one of
69  * the motivations for doing this is to ensure that logically equivalent
70  * expressions will be seen as physically equal(), so we should always apply
71  * the same transformations.
72  */
73 Node *
negate_clause(Node * node)74 negate_clause(Node *node)
75 {
76 	if (node == NULL)			/* should not happen */
77 		elog(ERROR, "can't negate an empty subexpression");
78 	switch (nodeTag(node))
79 	{
80 		case T_Const:
81 			{
82 				Const	   *c = (Const *) node;
83 
84 				/* NOT NULL is still NULL */
85 				if (c->constisnull)
86 					return makeBoolConst(false, true);
87 				/* otherwise pretty easy */
88 				return makeBoolConst(!DatumGetBool(c->constvalue), false);
89 			}
90 			break;
91 		case T_OpExpr:
92 			{
93 				/*
94 				 * Negate operator if possible: (NOT (< A B)) => (>= A B)
95 				 */
96 				OpExpr	   *opexpr = (OpExpr *) node;
97 				Oid			negator = get_negator(opexpr->opno);
98 
99 				if (negator)
100 				{
101 					OpExpr	   *newopexpr = makeNode(OpExpr);
102 
103 					newopexpr->opno = negator;
104 					newopexpr->opfuncid = InvalidOid;
105 					newopexpr->opresulttype = opexpr->opresulttype;
106 					newopexpr->opretset = opexpr->opretset;
107 					newopexpr->opcollid = opexpr->opcollid;
108 					newopexpr->inputcollid = opexpr->inputcollid;
109 					newopexpr->args = opexpr->args;
110 					newopexpr->location = opexpr->location;
111 					return (Node *) newopexpr;
112 				}
113 			}
114 			break;
115 		case T_ScalarArrayOpExpr:
116 			{
117 				/*
118 				 * Negate a ScalarArrayOpExpr if its operator has a negator;
119 				 * for example x = ANY (list) becomes x <> ALL (list)
120 				 */
121 				ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
122 				Oid			negator = get_negator(saopexpr->opno);
123 
124 				if (negator)
125 				{
126 					ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
127 
128 					newopexpr->opno = negator;
129 					newopexpr->opfuncid = InvalidOid;
130 					newopexpr->useOr = !saopexpr->useOr;
131 					newopexpr->inputcollid = saopexpr->inputcollid;
132 					newopexpr->args = saopexpr->args;
133 					newopexpr->location = saopexpr->location;
134 					return (Node *) newopexpr;
135 				}
136 			}
137 			break;
138 		case T_BoolExpr:
139 			{
140 				BoolExpr   *expr = (BoolExpr *) node;
141 
142 				switch (expr->boolop)
143 				{
144 						/*--------------------
145 						 * Apply DeMorgan's Laws:
146 						 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
147 						 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
148 						 * i.e., swap AND for OR and negate each subclause.
149 						 *
150 						 * If the input is already AND/OR flat and has no NOT
151 						 * directly above AND or OR, this transformation preserves
152 						 * those properties.  For example, if no direct child of
153 						 * the given AND clause is an AND or a NOT-above-OR, then
154 						 * the recursive calls of negate_clause() can't return any
155 						 * OR clauses.  So we needn't call pull_ors() before
156 						 * building a new OR clause.  Similarly for the OR case.
157 						 *--------------------
158 						 */
159 					case AND_EXPR:
160 						{
161 							List	   *nargs = NIL;
162 							ListCell   *lc;
163 
164 							foreach(lc, expr->args)
165 							{
166 								nargs = lappend(nargs,
167 												negate_clause(lfirst(lc)));
168 							}
169 							return (Node *) make_orclause(nargs);
170 						}
171 						break;
172 					case OR_EXPR:
173 						{
174 							List	   *nargs = NIL;
175 							ListCell   *lc;
176 
177 							foreach(lc, expr->args)
178 							{
179 								nargs = lappend(nargs,
180 												negate_clause(lfirst(lc)));
181 							}
182 							return (Node *) make_andclause(nargs);
183 						}
184 						break;
185 					case NOT_EXPR:
186 
187 						/*
188 						 * NOT underneath NOT: they cancel.  We assume the
189 						 * input is already simplified, so no need to recurse.
190 						 */
191 						return (Node *) linitial(expr->args);
192 					default:
193 						elog(ERROR, "unrecognized boolop: %d",
194 							 (int) expr->boolop);
195 						break;
196 				}
197 			}
198 			break;
199 		case T_NullTest:
200 			{
201 				NullTest   *expr = (NullTest *) node;
202 
203 				/*
204 				 * In the rowtype case, the two flavors of NullTest are *not*
205 				 * logical inverses, so we can't simplify.  But it does work
206 				 * for scalar datatypes.
207 				 */
208 				if (!expr->argisrow)
209 				{
210 					NullTest   *newexpr = makeNode(NullTest);
211 
212 					newexpr->arg = expr->arg;
213 					newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
214 											 IS_NOT_NULL : IS_NULL);
215 					newexpr->argisrow = expr->argisrow;
216 					newexpr->location = expr->location;
217 					return (Node *) newexpr;
218 				}
219 			}
220 			break;
221 		case T_BooleanTest:
222 			{
223 				BooleanTest *expr = (BooleanTest *) node;
224 				BooleanTest *newexpr = makeNode(BooleanTest);
225 
226 				newexpr->arg = expr->arg;
227 				switch (expr->booltesttype)
228 				{
229 					case IS_TRUE:
230 						newexpr->booltesttype = IS_NOT_TRUE;
231 						break;
232 					case IS_NOT_TRUE:
233 						newexpr->booltesttype = IS_TRUE;
234 						break;
235 					case IS_FALSE:
236 						newexpr->booltesttype = IS_NOT_FALSE;
237 						break;
238 					case IS_NOT_FALSE:
239 						newexpr->booltesttype = IS_FALSE;
240 						break;
241 					case IS_UNKNOWN:
242 						newexpr->booltesttype = IS_NOT_UNKNOWN;
243 						break;
244 					case IS_NOT_UNKNOWN:
245 						newexpr->booltesttype = IS_UNKNOWN;
246 						break;
247 					default:
248 						elog(ERROR, "unrecognized booltesttype: %d",
249 							 (int) expr->booltesttype);
250 						break;
251 				}
252 				newexpr->location = expr->location;
253 				return (Node *) newexpr;
254 			}
255 			break;
256 		default:
257 			/* else fall through */
258 			break;
259 	}
260 
261 	/*
262 	 * Otherwise we don't know how to simplify this, so just tack on an
263 	 * explicit NOT node.
264 	 */
265 	return (Node *) make_notclause((Expr *) node);
266 }
267 
268 
269 /*
270  * canonicalize_qual
271  *	  Convert a qualification expression to the most useful form.
272  *
273  * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
274  * clauses.  It can also be used on top-level CHECK constraints, for which
275  * pass is_check = true.  DO NOT call it on any expression that is not known
276  * to be one or the other, as it might apply inappropriate simplifications.
277  *
278  * The name of this routine is a holdover from a time when it would try to
279  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
280  * Eventually, we recognized that that had more theoretical purity than
281  * actual usefulness, and so now the transformation doesn't involve any
282  * notion of reaching a canonical form.
283  *
284  * NOTE: we assume the input has already been through eval_const_expressions
285  * and therefore possesses AND/OR flatness.  Formerly this function included
286  * its own flattening logic, but that requires a useless extra pass over the
287  * tree.
288  *
289  * Returns the modified qualification.
290  */
291 Expr *
canonicalize_qual(Expr * qual,bool is_check)292 canonicalize_qual(Expr *qual, bool is_check)
293 {
294 	Expr	   *newqual;
295 
296 	/* Quick exit for empty qual */
297 	if (qual == NULL)
298 		return NULL;
299 
300 	/* This should not be invoked on quals in implicit-AND format */
301 	Assert(!IsA(qual, List));
302 
303 	/*
304 	 * Pull up redundant subclauses in OR-of-AND trees.  We do this only
305 	 * within the top-level AND/OR structure; there's no point in looking
306 	 * deeper.  Also remove any NULL constants in the top-level structure.
307 	 */
308 	newqual = find_duplicate_ors(qual, is_check);
309 
310 	return newqual;
311 }
312 
313 
314 /*
315  * pull_ands
316  *	  Recursively flatten nested AND clauses into a single and-clause list.
317  *
318  * Input is the arglist of an AND clause.
319  * Returns the rebuilt arglist (note original list structure is not touched).
320  */
321 static List *
pull_ands(List * andlist)322 pull_ands(List *andlist)
323 {
324 	List	   *out_list = NIL;
325 	ListCell   *arg;
326 
327 	foreach(arg, andlist)
328 	{
329 		Node	   *subexpr = (Node *) lfirst(arg);
330 
331 		if (is_andclause(subexpr))
332 			out_list = list_concat(out_list,
333 								   pull_ands(((BoolExpr *) subexpr)->args));
334 		else
335 			out_list = lappend(out_list, subexpr);
336 	}
337 	return out_list;
338 }
339 
340 /*
341  * pull_ors
342  *	  Recursively flatten nested OR clauses into a single or-clause list.
343  *
344  * Input is the arglist of an OR clause.
345  * Returns the rebuilt arglist (note original list structure is not touched).
346  */
347 static List *
pull_ors(List * orlist)348 pull_ors(List *orlist)
349 {
350 	List	   *out_list = NIL;
351 	ListCell   *arg;
352 
353 	foreach(arg, orlist)
354 	{
355 		Node	   *subexpr = (Node *) lfirst(arg);
356 
357 		if (is_orclause(subexpr))
358 			out_list = list_concat(out_list,
359 								   pull_ors(((BoolExpr *) subexpr)->args));
360 		else
361 			out_list = lappend(out_list, subexpr);
362 	}
363 	return out_list;
364 }
365 
366 
367 /*--------------------
368  * The following code attempts to apply the inverse OR distributive law:
369  *		((A AND B) OR (A AND C))  =>  (A AND (B OR C))
370  * That is, locate OR clauses in which every subclause contains an
371  * identical term, and pull out the duplicated terms.
372  *
373  * This may seem like a fairly useless activity, but it turns out to be
374  * applicable to many machine-generated queries, and there are also queries
375  * in some of the TPC benchmarks that need it.  This was in fact almost the
376  * sole useful side-effect of the old prepqual code that tried to force
377  * the query into canonical AND-of-ORs form: the canonical equivalent of
378  *		((A AND B) OR (A AND C))
379  * is
380  *		((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
381  * which the code was able to simplify to
382  *		(A AND (A OR C) AND (B OR A) AND (B OR C))
383  * thus successfully extracting the common condition A --- but at the cost
384  * of cluttering the qual with many redundant clauses.
385  *--------------------
386  */
387 
388 /*
389  * find_duplicate_ors
390  *	  Given a qualification tree with the NOTs pushed down, search for
391  *	  OR clauses to which the inverse OR distributive law might apply.
392  *	  Only the top-level AND/OR structure is searched.
393  *
394  * While at it, we remove any NULL constants within the top-level AND/OR
395  * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
396  * In general that would change the result, so eval_const_expressions can't
397  * do it; but at top level of WHERE, we don't need to distinguish between
398  * FALSE and NULL results, so it's valid to treat NULL::boolean the same
399  * as FALSE and then simplify AND/OR accordingly.  Conversely, in a top-level
400  * CHECK constraint, we may treat a NULL the same as TRUE.
401  *
402  * Returns the modified qualification.  AND/OR flatness is preserved.
403  */
404 static Expr *
find_duplicate_ors(Expr * qual,bool is_check)405 find_duplicate_ors(Expr *qual, bool is_check)
406 {
407 	if (is_orclause(qual))
408 	{
409 		List	   *orlist = NIL;
410 		ListCell   *temp;
411 
412 		/* Recurse */
413 		foreach(temp, ((BoolExpr *) qual)->args)
414 		{
415 			Expr	   *arg = (Expr *) lfirst(temp);
416 
417 			arg = find_duplicate_ors(arg, is_check);
418 
419 			/* Get rid of any constant inputs */
420 			if (arg && IsA(arg, Const))
421 			{
422 				Const	   *carg = (Const *) arg;
423 
424 				if (is_check)
425 				{
426 					/* Within OR in CHECK, drop constant FALSE */
427 					if (!carg->constisnull && !DatumGetBool(carg->constvalue))
428 						continue;
429 					/* Constant TRUE or NULL, so OR reduces to TRUE */
430 					return (Expr *) makeBoolConst(true, false);
431 				}
432 				else
433 				{
434 					/* Within OR in WHERE, drop constant FALSE or NULL */
435 					if (carg->constisnull || !DatumGetBool(carg->constvalue))
436 						continue;
437 					/* Constant TRUE, so OR reduces to TRUE */
438 					return arg;
439 				}
440 			}
441 
442 			orlist = lappend(orlist, arg);
443 		}
444 
445 		/* Flatten any ORs pulled up to just below here */
446 		orlist = pull_ors(orlist);
447 
448 		/* Now we can look for duplicate ORs */
449 		return process_duplicate_ors(orlist);
450 	}
451 	else if (is_andclause(qual))
452 	{
453 		List	   *andlist = NIL;
454 		ListCell   *temp;
455 
456 		/* Recurse */
457 		foreach(temp, ((BoolExpr *) qual)->args)
458 		{
459 			Expr	   *arg = (Expr *) lfirst(temp);
460 
461 			arg = find_duplicate_ors(arg, is_check);
462 
463 			/* Get rid of any constant inputs */
464 			if (arg && IsA(arg, Const))
465 			{
466 				Const	   *carg = (Const *) arg;
467 
468 				if (is_check)
469 				{
470 					/* Within AND in CHECK, drop constant TRUE or NULL */
471 					if (carg->constisnull || DatumGetBool(carg->constvalue))
472 						continue;
473 					/* Constant FALSE, so AND reduces to FALSE */
474 					return arg;
475 				}
476 				else
477 				{
478 					/* Within AND in WHERE, drop constant TRUE */
479 					if (!carg->constisnull && DatumGetBool(carg->constvalue))
480 						continue;
481 					/* Constant FALSE or NULL, so AND reduces to FALSE */
482 					return (Expr *) makeBoolConst(false, false);
483 				}
484 			}
485 
486 			andlist = lappend(andlist, arg);
487 		}
488 
489 		/* Flatten any ANDs introduced just below here */
490 		andlist = pull_ands(andlist);
491 
492 		/* AND of no inputs reduces to TRUE */
493 		if (andlist == NIL)
494 			return (Expr *) makeBoolConst(true, false);
495 
496 		/* Single-expression AND just reduces to that expression */
497 		if (list_length(andlist) == 1)
498 			return (Expr *) linitial(andlist);
499 
500 		/* Else we still need an AND node */
501 		return make_andclause(andlist);
502 	}
503 	else
504 		return qual;
505 }
506 
507 /*
508  * process_duplicate_ors
509  *	  Given a list of exprs which are ORed together, try to apply
510  *	  the inverse OR distributive law.
511  *
512  * Returns the resulting expression (could be an AND clause, an OR
513  * clause, or maybe even a single subexpression).
514  */
515 static Expr *
process_duplicate_ors(List * orlist)516 process_duplicate_ors(List *orlist)
517 {
518 	List	   *reference = NIL;
519 	int			num_subclauses = 0;
520 	List	   *winners;
521 	List	   *neworlist;
522 	ListCell   *temp;
523 
524 	/* OR of no inputs reduces to FALSE */
525 	if (orlist == NIL)
526 		return (Expr *) makeBoolConst(false, false);
527 
528 	/* Single-expression OR just reduces to that expression */
529 	if (list_length(orlist) == 1)
530 		return (Expr *) linitial(orlist);
531 
532 	/*
533 	 * Choose the shortest AND clause as the reference list --- obviously, any
534 	 * subclause not in this clause isn't in all the clauses. If we find a
535 	 * clause that's not an AND, we can treat it as a one-element AND clause,
536 	 * which necessarily wins as shortest.
537 	 */
538 	foreach(temp, orlist)
539 	{
540 		Expr	   *clause = (Expr *) lfirst(temp);
541 
542 		if (is_andclause(clause))
543 		{
544 			List	   *subclauses = ((BoolExpr *) clause)->args;
545 			int			nclauses = list_length(subclauses);
546 
547 			if (reference == NIL || nclauses < num_subclauses)
548 			{
549 				reference = subclauses;
550 				num_subclauses = nclauses;
551 			}
552 		}
553 		else
554 		{
555 			reference = list_make1(clause);
556 			break;
557 		}
558 	}
559 
560 	/*
561 	 * Just in case, eliminate any duplicates in the reference list.
562 	 */
563 	reference = list_union(NIL, reference);
564 
565 	/*
566 	 * Check each element of the reference list to see if it's in all the OR
567 	 * clauses.  Build a new list of winning clauses.
568 	 */
569 	winners = NIL;
570 	foreach(temp, reference)
571 	{
572 		Expr	   *refclause = (Expr *) lfirst(temp);
573 		bool		win = true;
574 		ListCell   *temp2;
575 
576 		foreach(temp2, orlist)
577 		{
578 			Expr	   *clause = (Expr *) lfirst(temp2);
579 
580 			if (is_andclause(clause))
581 			{
582 				if (!list_member(((BoolExpr *) clause)->args, refclause))
583 				{
584 					win = false;
585 					break;
586 				}
587 			}
588 			else
589 			{
590 				if (!equal(refclause, clause))
591 				{
592 					win = false;
593 					break;
594 				}
595 			}
596 		}
597 
598 		if (win)
599 			winners = lappend(winners, refclause);
600 	}
601 
602 	/*
603 	 * If no winners, we can't transform the OR
604 	 */
605 	if (winners == NIL)
606 		return make_orclause(orlist);
607 
608 	/*
609 	 * Generate new OR list consisting of the remaining sub-clauses.
610 	 *
611 	 * If any clause degenerates to empty, then we have a situation like (A
612 	 * AND B) OR (A), which can be reduced to just A --- that is, the
613 	 * additional conditions in other arms of the OR are irrelevant.
614 	 *
615 	 * Note that because we use list_difference, any multiple occurrences of a
616 	 * winning clause in an AND sub-clause will be removed automatically.
617 	 */
618 	neworlist = NIL;
619 	foreach(temp, orlist)
620 	{
621 		Expr	   *clause = (Expr *) lfirst(temp);
622 
623 		if (is_andclause(clause))
624 		{
625 			List	   *subclauses = ((BoolExpr *) clause)->args;
626 
627 			subclauses = list_difference(subclauses, winners);
628 			if (subclauses != NIL)
629 			{
630 				if (list_length(subclauses) == 1)
631 					neworlist = lappend(neworlist, linitial(subclauses));
632 				else
633 					neworlist = lappend(neworlist, make_andclause(subclauses));
634 			}
635 			else
636 			{
637 				neworlist = NIL;	/* degenerate case, see above */
638 				break;
639 			}
640 		}
641 		else
642 		{
643 			if (!list_member(winners, clause))
644 				neworlist = lappend(neworlist, clause);
645 			else
646 			{
647 				neworlist = NIL;	/* degenerate case, see above */
648 				break;
649 			}
650 		}
651 	}
652 
653 	/*
654 	 * Append reduced OR to the winners list, if it's not degenerate, handling
655 	 * the special case of one element correctly (can that really happen?).
656 	 * Also be careful to maintain AND/OR flatness in case we pulled up a
657 	 * sub-sub-OR-clause.
658 	 */
659 	if (neworlist != NIL)
660 	{
661 		if (list_length(neworlist) == 1)
662 			winners = lappend(winners, linitial(neworlist));
663 		else
664 			winners = lappend(winners, make_orclause(pull_ors(neworlist)));
665 	}
666 
667 	/*
668 	 * And return the constructed AND clause, again being wary of a single
669 	 * element and AND/OR flatness.
670 	 */
671 	if (list_length(winners) == 1)
672 		return (Expr *) linitial(winners);
673 	else
674 		return make_andclause(pull_ands(winners));
675 }
676