1 /*-------------------------------------------------------------------------
2  *
3  * prepqual.c
4  *	  Routines for preprocessing qualification expressions
5  *
6  *
7  * While the parser will produce flattened (N-argument) AND/OR trees from
8  * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9  * directly underneath another AND, or OR underneath OR, if the input was
10  * oddly parenthesized.  Also, rule expansion and subquery flattening could
11  * produce such parsetrees.  The planner wants to flatten all such cases
12  * to ensure consistent optimization behavior.
13  *
14  * Formerly, this module was responsible for doing the initial flattening,
15  * but now we leave it to eval_const_expressions to do that since it has to
16  * make a complete pass over the expression tree anyway.  Instead, we just
17  * have to ensure that our manipulations preserve AND/OR flatness.
18  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19  * tree after local transformations that might introduce nested AND/ORs.
20  *
21  *
22  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  *	  src/backend/optimizer/prep/prepqual.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #include "optimizer/optimizer.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
39 
40 
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_duplicate_ors(Expr *qual, bool is_check);
44 static Expr *process_duplicate_ors(List *orlist);
45 
46 
47 /*
48  * negate_clause
49  *	  Negate a Boolean expression.
50  *
51  * Input is a clause to be negated (e.g., the argument of a NOT clause).
52  * Returns a new clause equivalent to the negation of the given clause.
53  *
54  * Although this can be invoked on its own, it's mainly intended as a helper
55  * for eval_const_expressions(), and that context drives several design
56  * decisions.  In particular, if the input is already AND/OR flat, we must
57  * preserve that property.  We also don't bother to recurse in situations
58  * where we can assume that lower-level executions of eval_const_expressions
59  * would already have simplified sub-clauses of the input.
60  *
61  * The difference between this and a simple make_notclause() is that this
62  * tries to get rid of the NOT node by logical simplification.  It's clearly
63  * always a win if the NOT node can be eliminated altogether.  However, our
64  * use of DeMorgan's laws could result in having more NOT nodes rather than
65  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
66  * important to expose as much top-level AND/OR structure as possible.
67  * Also, eliminating an intermediate NOT may allow us to flatten two levels
68  * of AND or OR together that we couldn't have otherwise.  Finally, one of
69  * the motivations for doing this is to ensure that logically equivalent
70  * expressions will be seen as physically equal(), so we should always apply
71  * the same transformations.
72  */
73 Node *
negate_clause(Node * node)74 negate_clause(Node *node)
75 {
76 	if (node == NULL)			/* should not happen */
77 		elog(ERROR, "can't negate an empty subexpression");
78 	switch (nodeTag(node))
79 	{
80 		case T_Const:
81 			{
82 				Const	   *c = (Const *) node;
83 
84 				/* NOT NULL is still NULL */
85 				if (c->constisnull)
86 					return makeBoolConst(false, true);
87 				/* otherwise pretty easy */
88 				return makeBoolConst(!DatumGetBool(c->constvalue), false);
89 			}
90 			break;
91 		case T_OpExpr:
92 			{
93 				/*
94 				 * Negate operator if possible: (NOT (< A B)) => (>= A B)
95 				 */
96 				OpExpr	   *opexpr = (OpExpr *) node;
97 				Oid			negator = get_negator(opexpr->opno);
98 
99 				if (negator)
100 				{
101 					OpExpr	   *newopexpr = makeNode(OpExpr);
102 
103 					newopexpr->opno = negator;
104 					newopexpr->opfuncid = InvalidOid;
105 					newopexpr->opresulttype = opexpr->opresulttype;
106 					newopexpr->opretset = opexpr->opretset;
107 					newopexpr->opcollid = opexpr->opcollid;
108 					newopexpr->inputcollid = opexpr->inputcollid;
109 					newopexpr->args = opexpr->args;
110 					newopexpr->location = opexpr->location;
111 					return (Node *) newopexpr;
112 				}
113 			}
114 			break;
115 		case T_ScalarArrayOpExpr:
116 			{
117 				/*
118 				 * Negate a ScalarArrayOpExpr if its operator has a negator;
119 				 * for example x = ANY (list) becomes x <> ALL (list)
120 				 */
121 				ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
122 				Oid			negator = get_negator(saopexpr->opno);
123 
124 				if (negator)
125 				{
126 					ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
127 
128 					newopexpr->opno = negator;
129 					newopexpr->opfuncid = InvalidOid;
130 					newopexpr->useOr = !saopexpr->useOr;
131 					newopexpr->inputcollid = saopexpr->inputcollid;
132 					newopexpr->args = saopexpr->args;
133 					newopexpr->location = saopexpr->location;
134 					return (Node *) newopexpr;
135 				}
136 			}
137 			break;
138 		case T_BoolExpr:
139 			{
140 				BoolExpr   *expr = (BoolExpr *) node;
141 
142 				switch (expr->boolop)
143 				{
144 						/*--------------------
145 						 * Apply DeMorgan's Laws:
146 						 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
147 						 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
148 						 * i.e., swap AND for OR and negate each subclause.
149 						 *
150 						 * If the input is already AND/OR flat and has no NOT
151 						 * directly above AND or OR, this transformation preserves
152 						 * those properties.  For example, if no direct child of
153 						 * the given AND clause is an AND or a NOT-above-OR, then
154 						 * the recursive calls of negate_clause() can't return any
155 						 * OR clauses.  So we needn't call pull_ors() before
156 						 * building a new OR clause.  Similarly for the OR case.
157 						 *--------------------
158 						 */
159 					case AND_EXPR:
160 						{
161 							List	   *nargs = NIL;
162 							ListCell   *lc;
163 
164 							foreach(lc, expr->args)
165 							{
166 								nargs = lappend(nargs,
167 												negate_clause(lfirst(lc)));
168 							}
169 							return (Node *) make_orclause(nargs);
170 						}
171 						break;
172 					case OR_EXPR:
173 						{
174 							List	   *nargs = NIL;
175 							ListCell   *lc;
176 
177 							foreach(lc, expr->args)
178 							{
179 								nargs = lappend(nargs,
180 												negate_clause(lfirst(lc)));
181 							}
182 							return (Node *) make_andclause(nargs);
183 						}
184 						break;
185 					case NOT_EXPR:
186 
187 						/*
188 						 * NOT underneath NOT: they cancel.  We assume the
189 						 * input is already simplified, so no need to recurse.
190 						 */
191 						return (Node *) linitial(expr->args);
192 					default:
193 						elog(ERROR, "unrecognized boolop: %d",
194 							 (int) expr->boolop);
195 						break;
196 				}
197 			}
198 			break;
199 		case T_NullTest:
200 			{
201 				NullTest   *expr = (NullTest *) node;
202 
203 				/*
204 				 * In the rowtype case, the two flavors of NullTest are *not*
205 				 * logical inverses, so we can't simplify.  But it does work
206 				 * for scalar datatypes.
207 				 */
208 				if (!expr->argisrow)
209 				{
210 					NullTest   *newexpr = makeNode(NullTest);
211 
212 					newexpr->arg = expr->arg;
213 					newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
214 											 IS_NOT_NULL : IS_NULL);
215 					newexpr->argisrow = expr->argisrow;
216 					newexpr->location = expr->location;
217 					return (Node *) newexpr;
218 				}
219 			}
220 			break;
221 		case T_BooleanTest:
222 			{
223 				BooleanTest *expr = (BooleanTest *) node;
224 				BooleanTest *newexpr = makeNode(BooleanTest);
225 
226 				newexpr->arg = expr->arg;
227 				switch (expr->booltesttype)
228 				{
229 					case IS_TRUE:
230 						newexpr->booltesttype = IS_NOT_TRUE;
231 						break;
232 					case IS_NOT_TRUE:
233 						newexpr->booltesttype = IS_TRUE;
234 						break;
235 					case IS_FALSE:
236 						newexpr->booltesttype = IS_NOT_FALSE;
237 						break;
238 					case IS_NOT_FALSE:
239 						newexpr->booltesttype = IS_FALSE;
240 						break;
241 					case IS_UNKNOWN:
242 						newexpr->booltesttype = IS_NOT_UNKNOWN;
243 						break;
244 					case IS_NOT_UNKNOWN:
245 						newexpr->booltesttype = IS_UNKNOWN;
246 						break;
247 					default:
248 						elog(ERROR, "unrecognized booltesttype: %d",
249 							 (int) expr->booltesttype);
250 						break;
251 				}
252 				newexpr->location = expr->location;
253 				return (Node *) newexpr;
254 			}
255 			break;
256 		default:
257 			/* else fall through */
258 			break;
259 	}
260 
261 	/*
262 	 * Otherwise we don't know how to simplify this, so just tack on an
263 	 * explicit NOT node.
264 	 */
265 	return (Node *) make_notclause((Expr *) node);
266 }
267 
268 
269 /*
270  * canonicalize_qual
271  *	  Convert a qualification expression to the most useful form.
272  *
273  * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
274  * clauses.  It can also be used on top-level CHECK constraints, for which
275  * pass is_check = true.  DO NOT call it on any expression that is not known
276  * to be one or the other, as it might apply inappropriate simplifications.
277  *
278  * The name of this routine is a holdover from a time when it would try to
279  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
280  * Eventually, we recognized that that had more theoretical purity than
281  * actual usefulness, and so now the transformation doesn't involve any
282  * notion of reaching a canonical form.
283  *
284  * NOTE: we assume the input has already been through eval_const_expressions
285  * and therefore possesses AND/OR flatness.  Formerly this function included
286  * its own flattening logic, but that requires a useless extra pass over the
287  * tree.
288  *
289  * Returns the modified qualification.
290  */
291 Expr *
canonicalize_qual(Expr * qual,bool is_check)292 canonicalize_qual(Expr *qual, bool is_check)
293 {
294 	Expr	   *newqual;
295 
296 	/* Quick exit for empty qual */
297 	if (qual == NULL)
298 		return NULL;
299 
300 	/* This should not be invoked on quals in implicit-AND format */
301 	Assert(!IsA(qual, List));
302 
303 	/*
304 	 * Pull up redundant subclauses in OR-of-AND trees.  We do this only
305 	 * within the top-level AND/OR structure; there's no point in looking
306 	 * deeper.  Also remove any NULL constants in the top-level structure.
307 	 */
308 	newqual = find_duplicate_ors(qual, is_check);
309 
310 	return newqual;
311 }
312 
313 
314 /*
315  * pull_ands
316  *	  Recursively flatten nested AND clauses into a single and-clause list.
317  *
318  * Input is the arglist of an AND clause.
319  * Returns the rebuilt arglist (note original list structure is not touched).
320  */
321 static List *
pull_ands(List * andlist)322 pull_ands(List *andlist)
323 {
324 	List	   *out_list = NIL;
325 	ListCell   *arg;
326 
327 	foreach(arg, andlist)
328 	{
329 		Node	   *subexpr = (Node *) lfirst(arg);
330 
331 		/*
332 		 * Note: we can destructively concat the subexpression's arglist
333 		 * because we know the recursive invocation of pull_ands will have
334 		 * built a new arglist not shared with any other expr. Otherwise we'd
335 		 * need a list_copy here.
336 		 */
337 		if (is_andclause(subexpr))
338 			out_list = list_concat(out_list,
339 								   pull_ands(((BoolExpr *) subexpr)->args));
340 		else
341 			out_list = lappend(out_list, subexpr);
342 	}
343 	return out_list;
344 }
345 
346 /*
347  * pull_ors
348  *	  Recursively flatten nested OR clauses into a single or-clause list.
349  *
350  * Input is the arglist of an OR clause.
351  * Returns the rebuilt arglist (note original list structure is not touched).
352  */
353 static List *
pull_ors(List * orlist)354 pull_ors(List *orlist)
355 {
356 	List	   *out_list = NIL;
357 	ListCell   *arg;
358 
359 	foreach(arg, orlist)
360 	{
361 		Node	   *subexpr = (Node *) lfirst(arg);
362 
363 		/*
364 		 * Note: we can destructively concat the subexpression's arglist
365 		 * because we know the recursive invocation of pull_ors will have
366 		 * built a new arglist not shared with any other expr. Otherwise we'd
367 		 * need a list_copy here.
368 		 */
369 		if (is_orclause(subexpr))
370 			out_list = list_concat(out_list,
371 								   pull_ors(((BoolExpr *) subexpr)->args));
372 		else
373 			out_list = lappend(out_list, subexpr);
374 	}
375 	return out_list;
376 }
377 
378 
379 /*--------------------
380  * The following code attempts to apply the inverse OR distributive law:
381  *		((A AND B) OR (A AND C))  =>  (A AND (B OR C))
382  * That is, locate OR clauses in which every subclause contains an
383  * identical term, and pull out the duplicated terms.
384  *
385  * This may seem like a fairly useless activity, but it turns out to be
386  * applicable to many machine-generated queries, and there are also queries
387  * in some of the TPC benchmarks that need it.  This was in fact almost the
388  * sole useful side-effect of the old prepqual code that tried to force
389  * the query into canonical AND-of-ORs form: the canonical equivalent of
390  *		((A AND B) OR (A AND C))
391  * is
392  *		((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
393  * which the code was able to simplify to
394  *		(A AND (A OR C) AND (B OR A) AND (B OR C))
395  * thus successfully extracting the common condition A --- but at the cost
396  * of cluttering the qual with many redundant clauses.
397  *--------------------
398  */
399 
400 /*
401  * find_duplicate_ors
402  *	  Given a qualification tree with the NOTs pushed down, search for
403  *	  OR clauses to which the inverse OR distributive law might apply.
404  *	  Only the top-level AND/OR structure is searched.
405  *
406  * While at it, we remove any NULL constants within the top-level AND/OR
407  * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
408  * In general that would change the result, so eval_const_expressions can't
409  * do it; but at top level of WHERE, we don't need to distinguish between
410  * FALSE and NULL results, so it's valid to treat NULL::boolean the same
411  * as FALSE and then simplify AND/OR accordingly.  Conversely, in a top-level
412  * CHECK constraint, we may treat a NULL the same as TRUE.
413  *
414  * Returns the modified qualification.  AND/OR flatness is preserved.
415  */
416 static Expr *
find_duplicate_ors(Expr * qual,bool is_check)417 find_duplicate_ors(Expr *qual, bool is_check)
418 {
419 	if (is_orclause(qual))
420 	{
421 		List	   *orlist = NIL;
422 		ListCell   *temp;
423 
424 		/* Recurse */
425 		foreach(temp, ((BoolExpr *) qual)->args)
426 		{
427 			Expr	   *arg = (Expr *) lfirst(temp);
428 
429 			arg = find_duplicate_ors(arg, is_check);
430 
431 			/* Get rid of any constant inputs */
432 			if (arg && IsA(arg, Const))
433 			{
434 				Const	   *carg = (Const *) arg;
435 
436 				if (is_check)
437 				{
438 					/* Within OR in CHECK, drop constant FALSE */
439 					if (!carg->constisnull && !DatumGetBool(carg->constvalue))
440 						continue;
441 					/* Constant TRUE or NULL, so OR reduces to TRUE */
442 					return (Expr *) makeBoolConst(true, false);
443 				}
444 				else
445 				{
446 					/* Within OR in WHERE, drop constant FALSE or NULL */
447 					if (carg->constisnull || !DatumGetBool(carg->constvalue))
448 						continue;
449 					/* Constant TRUE, so OR reduces to TRUE */
450 					return arg;
451 				}
452 			}
453 
454 			orlist = lappend(orlist, arg);
455 		}
456 
457 		/* Flatten any ORs pulled up to just below here */
458 		orlist = pull_ors(orlist);
459 
460 		/* Now we can look for duplicate ORs */
461 		return process_duplicate_ors(orlist);
462 	}
463 	else if (is_andclause(qual))
464 	{
465 		List	   *andlist = NIL;
466 		ListCell   *temp;
467 
468 		/* Recurse */
469 		foreach(temp, ((BoolExpr *) qual)->args)
470 		{
471 			Expr	   *arg = (Expr *) lfirst(temp);
472 
473 			arg = find_duplicate_ors(arg, is_check);
474 
475 			/* Get rid of any constant inputs */
476 			if (arg && IsA(arg, Const))
477 			{
478 				Const	   *carg = (Const *) arg;
479 
480 				if (is_check)
481 				{
482 					/* Within AND in CHECK, drop constant TRUE or NULL */
483 					if (carg->constisnull || DatumGetBool(carg->constvalue))
484 						continue;
485 					/* Constant FALSE, so AND reduces to FALSE */
486 					return arg;
487 				}
488 				else
489 				{
490 					/* Within AND in WHERE, drop constant TRUE */
491 					if (!carg->constisnull && DatumGetBool(carg->constvalue))
492 						continue;
493 					/* Constant FALSE or NULL, so AND reduces to FALSE */
494 					return (Expr *) makeBoolConst(false, false);
495 				}
496 			}
497 
498 			andlist = lappend(andlist, arg);
499 		}
500 
501 		/* Flatten any ANDs introduced just below here */
502 		andlist = pull_ands(andlist);
503 
504 		/* AND of no inputs reduces to TRUE */
505 		if (andlist == NIL)
506 			return (Expr *) makeBoolConst(true, false);
507 
508 		/* Single-expression AND just reduces to that expression */
509 		if (list_length(andlist) == 1)
510 			return (Expr *) linitial(andlist);
511 
512 		/* Else we still need an AND node */
513 		return make_andclause(andlist);
514 	}
515 	else
516 		return qual;
517 }
518 
519 /*
520  * process_duplicate_ors
521  *	  Given a list of exprs which are ORed together, try to apply
522  *	  the inverse OR distributive law.
523  *
524  * Returns the resulting expression (could be an AND clause, an OR
525  * clause, or maybe even a single subexpression).
526  */
527 static Expr *
process_duplicate_ors(List * orlist)528 process_duplicate_ors(List *orlist)
529 {
530 	List	   *reference = NIL;
531 	int			num_subclauses = 0;
532 	List	   *winners;
533 	List	   *neworlist;
534 	ListCell   *temp;
535 
536 	/* OR of no inputs reduces to FALSE */
537 	if (orlist == NIL)
538 		return (Expr *) makeBoolConst(false, false);
539 
540 	/* Single-expression OR just reduces to that expression */
541 	if (list_length(orlist) == 1)
542 		return (Expr *) linitial(orlist);
543 
544 	/*
545 	 * Choose the shortest AND clause as the reference list --- obviously, any
546 	 * subclause not in this clause isn't in all the clauses. If we find a
547 	 * clause that's not an AND, we can treat it as a one-element AND clause,
548 	 * which necessarily wins as shortest.
549 	 */
550 	foreach(temp, orlist)
551 	{
552 		Expr	   *clause = (Expr *) lfirst(temp);
553 
554 		if (is_andclause(clause))
555 		{
556 			List	   *subclauses = ((BoolExpr *) clause)->args;
557 			int			nclauses = list_length(subclauses);
558 
559 			if (reference == NIL || nclauses < num_subclauses)
560 			{
561 				reference = subclauses;
562 				num_subclauses = nclauses;
563 			}
564 		}
565 		else
566 		{
567 			reference = list_make1(clause);
568 			break;
569 		}
570 	}
571 
572 	/*
573 	 * Just in case, eliminate any duplicates in the reference list.
574 	 */
575 	reference = list_union(NIL, reference);
576 
577 	/*
578 	 * Check each element of the reference list to see if it's in all the OR
579 	 * clauses.  Build a new list of winning clauses.
580 	 */
581 	winners = NIL;
582 	foreach(temp, reference)
583 	{
584 		Expr	   *refclause = (Expr *) lfirst(temp);
585 		bool		win = true;
586 		ListCell   *temp2;
587 
588 		foreach(temp2, orlist)
589 		{
590 			Expr	   *clause = (Expr *) lfirst(temp2);
591 
592 			if (is_andclause(clause))
593 			{
594 				if (!list_member(((BoolExpr *) clause)->args, refclause))
595 				{
596 					win = false;
597 					break;
598 				}
599 			}
600 			else
601 			{
602 				if (!equal(refclause, clause))
603 				{
604 					win = false;
605 					break;
606 				}
607 			}
608 		}
609 
610 		if (win)
611 			winners = lappend(winners, refclause);
612 	}
613 
614 	/*
615 	 * If no winners, we can't transform the OR
616 	 */
617 	if (winners == NIL)
618 		return make_orclause(orlist);
619 
620 	/*
621 	 * Generate new OR list consisting of the remaining sub-clauses.
622 	 *
623 	 * If any clause degenerates to empty, then we have a situation like (A
624 	 * AND B) OR (A), which can be reduced to just A --- that is, the
625 	 * additional conditions in other arms of the OR are irrelevant.
626 	 *
627 	 * Note that because we use list_difference, any multiple occurrences of a
628 	 * winning clause in an AND sub-clause will be removed automatically.
629 	 */
630 	neworlist = NIL;
631 	foreach(temp, orlist)
632 	{
633 		Expr	   *clause = (Expr *) lfirst(temp);
634 
635 		if (is_andclause(clause))
636 		{
637 			List	   *subclauses = ((BoolExpr *) clause)->args;
638 
639 			subclauses = list_difference(subclauses, winners);
640 			if (subclauses != NIL)
641 			{
642 				if (list_length(subclauses) == 1)
643 					neworlist = lappend(neworlist, linitial(subclauses));
644 				else
645 					neworlist = lappend(neworlist, make_andclause(subclauses));
646 			}
647 			else
648 			{
649 				neworlist = NIL;	/* degenerate case, see above */
650 				break;
651 			}
652 		}
653 		else
654 		{
655 			if (!list_member(winners, clause))
656 				neworlist = lappend(neworlist, clause);
657 			else
658 			{
659 				neworlist = NIL;	/* degenerate case, see above */
660 				break;
661 			}
662 		}
663 	}
664 
665 	/*
666 	 * Append reduced OR to the winners list, if it's not degenerate, handling
667 	 * the special case of one element correctly (can that really happen?).
668 	 * Also be careful to maintain AND/OR flatness in case we pulled up a
669 	 * sub-sub-OR-clause.
670 	 */
671 	if (neworlist != NIL)
672 	{
673 		if (list_length(neworlist) == 1)
674 			winners = lappend(winners, linitial(neworlist));
675 		else
676 			winners = lappend(winners, make_orclause(pull_ors(neworlist)));
677 	}
678 
679 	/*
680 	 * And return the constructed AND clause, again being wary of a single
681 	 * element and AND/OR flatness.
682 	 */
683 	if (list_length(winners) == 1)
684 		return (Expr *) linitial(winners);
685 	else
686 		return make_andclause(pull_ands(winners));
687 }
688