1 /*-------------------------------------------------------------------------
2  *
3  * prepqual.c
4  *	  Routines for preprocessing qualification expressions
5  *
6  *
7  * While the parser will produce flattened (N-argument) AND/OR trees from
8  * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9  * directly underneath another AND, or OR underneath OR, if the input was
10  * oddly parenthesized.  Also, rule expansion and subquery flattening could
11  * produce such parsetrees.  The planner wants to flatten all such cases
12  * to ensure consistent optimization behavior.
13  *
14  * Formerly, this module was responsible for doing the initial flattening,
15  * but now we leave it to eval_const_expressions to do that since it has to
16  * make a complete pass over the expression tree anyway.  Instead, we just
17  * have to ensure that our manipulations preserve AND/OR flatness.
18  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19  * tree after local transformations that might introduce nested AND/ORs.
20  *
21  *
22  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  *	  src/backend/optimizer/prep/prepqual.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "nodes/makefuncs.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/prep.h"
37 #include "utils/lsyscache.h"
38 
39 
40 static List *pull_ands(List *andlist);
41 static List *pull_ors(List *orlist);
42 static Expr *find_duplicate_ors(Expr *qual, bool is_check);
43 static Expr *process_duplicate_ors(List *orlist);
44 
45 
46 /*
47  * negate_clause
48  *	  Negate a Boolean expression.
49  *
50  * Input is a clause to be negated (e.g., the argument of a NOT clause).
51  * Returns a new clause equivalent to the negation of the given clause.
52  *
53  * Although this can be invoked on its own, it's mainly intended as a helper
54  * for eval_const_expressions(), and that context drives several design
55  * decisions.  In particular, if the input is already AND/OR flat, we must
56  * preserve that property.  We also don't bother to recurse in situations
57  * where we can assume that lower-level executions of eval_const_expressions
58  * would already have simplified sub-clauses of the input.
59  *
60  * The difference between this and a simple make_notclause() is that this
61  * tries to get rid of the NOT node by logical simplification.  It's clearly
62  * always a win if the NOT node can be eliminated altogether.  However, our
63  * use of DeMorgan's laws could result in having more NOT nodes rather than
64  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
65  * important to expose as much top-level AND/OR structure as possible.
66  * Also, eliminating an intermediate NOT may allow us to flatten two levels
67  * of AND or OR together that we couldn't have otherwise.  Finally, one of
68  * the motivations for doing this is to ensure that logically equivalent
69  * expressions will be seen as physically equal(), so we should always apply
70  * the same transformations.
71  */
72 Node *
negate_clause(Node * node)73 negate_clause(Node *node)
74 {
75 	if (node == NULL)			/* should not happen */
76 		elog(ERROR, "can't negate an empty subexpression");
77 	switch (nodeTag(node))
78 	{
79 		case T_Const:
80 			{
81 				Const	   *c = (Const *) node;
82 
83 				/* NOT NULL is still NULL */
84 				if (c->constisnull)
85 					return makeBoolConst(false, true);
86 				/* otherwise pretty easy */
87 				return makeBoolConst(!DatumGetBool(c->constvalue), false);
88 			}
89 			break;
90 		case T_OpExpr:
91 			{
92 				/*
93 				 * Negate operator if possible: (NOT (< A B)) => (>= A B)
94 				 */
95 				OpExpr	   *opexpr = (OpExpr *) node;
96 				Oid			negator = get_negator(opexpr->opno);
97 
98 				if (negator)
99 				{
100 					OpExpr	   *newopexpr = makeNode(OpExpr);
101 
102 					newopexpr->opno = negator;
103 					newopexpr->opfuncid = InvalidOid;
104 					newopexpr->opresulttype = opexpr->opresulttype;
105 					newopexpr->opretset = opexpr->opretset;
106 					newopexpr->opcollid = opexpr->opcollid;
107 					newopexpr->inputcollid = opexpr->inputcollid;
108 					newopexpr->args = opexpr->args;
109 					newopexpr->location = opexpr->location;
110 					return (Node *) newopexpr;
111 				}
112 			}
113 			break;
114 		case T_ScalarArrayOpExpr:
115 			{
116 				/*
117 				 * Negate a ScalarArrayOpExpr if its operator has a negator;
118 				 * for example x = ANY (list) becomes x <> ALL (list)
119 				 */
120 				ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
121 				Oid			negator = get_negator(saopexpr->opno);
122 
123 				if (negator)
124 				{
125 					ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
126 
127 					newopexpr->opno = negator;
128 					newopexpr->opfuncid = InvalidOid;
129 					newopexpr->useOr = !saopexpr->useOr;
130 					newopexpr->inputcollid = saopexpr->inputcollid;
131 					newopexpr->args = saopexpr->args;
132 					newopexpr->location = saopexpr->location;
133 					return (Node *) newopexpr;
134 				}
135 			}
136 			break;
137 		case T_BoolExpr:
138 			{
139 				BoolExpr   *expr = (BoolExpr *) node;
140 
141 				switch (expr->boolop)
142 				{
143 						/*--------------------
144 						 * Apply DeMorgan's Laws:
145 						 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
146 						 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
147 						 * i.e., swap AND for OR and negate each subclause.
148 						 *
149 						 * If the input is already AND/OR flat and has no NOT
150 						 * directly above AND or OR, this transformation preserves
151 						 * those properties.  For example, if no direct child of
152 						 * the given AND clause is an AND or a NOT-above-OR, then
153 						 * the recursive calls of negate_clause() can't return any
154 						 * OR clauses.  So we needn't call pull_ors() before
155 						 * building a new OR clause.  Similarly for the OR case.
156 						 *--------------------
157 						 */
158 					case AND_EXPR:
159 						{
160 							List	   *nargs = NIL;
161 							ListCell   *lc;
162 
163 							foreach(lc, expr->args)
164 							{
165 								nargs = lappend(nargs,
166 												negate_clause(lfirst(lc)));
167 							}
168 							return (Node *) make_orclause(nargs);
169 						}
170 						break;
171 					case OR_EXPR:
172 						{
173 							List	   *nargs = NIL;
174 							ListCell   *lc;
175 
176 							foreach(lc, expr->args)
177 							{
178 								nargs = lappend(nargs,
179 												negate_clause(lfirst(lc)));
180 							}
181 							return (Node *) make_andclause(nargs);
182 						}
183 						break;
184 					case NOT_EXPR:
185 
186 						/*
187 						 * NOT underneath NOT: they cancel.  We assume the
188 						 * input is already simplified, so no need to recurse.
189 						 */
190 						return (Node *) linitial(expr->args);
191 					default:
192 						elog(ERROR, "unrecognized boolop: %d",
193 							 (int) expr->boolop);
194 						break;
195 				}
196 			}
197 			break;
198 		case T_NullTest:
199 			{
200 				NullTest   *expr = (NullTest *) node;
201 
202 				/*
203 				 * In the rowtype case, the two flavors of NullTest are *not*
204 				 * logical inverses, so we can't simplify.  But it does work
205 				 * for scalar datatypes.
206 				 */
207 				if (!expr->argisrow)
208 				{
209 					NullTest   *newexpr = makeNode(NullTest);
210 
211 					newexpr->arg = expr->arg;
212 					newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
213 											 IS_NOT_NULL : IS_NULL);
214 					newexpr->argisrow = expr->argisrow;
215 					newexpr->location = expr->location;
216 					return (Node *) newexpr;
217 				}
218 			}
219 			break;
220 		case T_BooleanTest:
221 			{
222 				BooleanTest *expr = (BooleanTest *) node;
223 				BooleanTest *newexpr = makeNode(BooleanTest);
224 
225 				newexpr->arg = expr->arg;
226 				switch (expr->booltesttype)
227 				{
228 					case IS_TRUE:
229 						newexpr->booltesttype = IS_NOT_TRUE;
230 						break;
231 					case IS_NOT_TRUE:
232 						newexpr->booltesttype = IS_TRUE;
233 						break;
234 					case IS_FALSE:
235 						newexpr->booltesttype = IS_NOT_FALSE;
236 						break;
237 					case IS_NOT_FALSE:
238 						newexpr->booltesttype = IS_FALSE;
239 						break;
240 					case IS_UNKNOWN:
241 						newexpr->booltesttype = IS_NOT_UNKNOWN;
242 						break;
243 					case IS_NOT_UNKNOWN:
244 						newexpr->booltesttype = IS_UNKNOWN;
245 						break;
246 					default:
247 						elog(ERROR, "unrecognized booltesttype: %d",
248 							 (int) expr->booltesttype);
249 						break;
250 				}
251 				newexpr->location = expr->location;
252 				return (Node *) newexpr;
253 			}
254 			break;
255 		default:
256 			/* else fall through */
257 			break;
258 	}
259 
260 	/*
261 	 * Otherwise we don't know how to simplify this, so just tack on an
262 	 * explicit NOT node.
263 	 */
264 	return (Node *) make_notclause((Expr *) node);
265 }
266 
267 
268 /*
269  * canonicalize_qual
270  *	  Convert a qualification expression to the most useful form.
271  *
272  * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
273  * clauses.  It can also be used on top-level CHECK constraints, for which
274  * pass is_check = true.  DO NOT call it on any expression that is not known
275  * to be one or the other, as it might apply inappropriate simplifications.
276  *
277  * The name of this routine is a holdover from a time when it would try to
278  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
279  * Eventually, we recognized that that had more theoretical purity than
280  * actual usefulness, and so now the transformation doesn't involve any
281  * notion of reaching a canonical form.
282  *
283  * NOTE: we assume the input has already been through eval_const_expressions
284  * and therefore possesses AND/OR flatness.  Formerly this function included
285  * its own flattening logic, but that requires a useless extra pass over the
286  * tree.
287  *
288  * Returns the modified qualification.
289  */
290 Expr *
canonicalize_qual(Expr * qual,bool is_check)291 canonicalize_qual(Expr *qual, bool is_check)
292 {
293 	Expr	   *newqual;
294 
295 	/* Quick exit for empty qual */
296 	if (qual == NULL)
297 		return NULL;
298 
299 	/* This should not be invoked on quals in implicit-AND format */
300 	Assert(!IsA(qual, List));
301 
302 	/*
303 	 * Pull up redundant subclauses in OR-of-AND trees.  We do this only
304 	 * within the top-level AND/OR structure; there's no point in looking
305 	 * deeper.  Also remove any NULL constants in the top-level structure.
306 	 */
307 	newqual = find_duplicate_ors(qual, is_check);
308 
309 	return newqual;
310 }
311 
312 
313 /*
314  * pull_ands
315  *	  Recursively flatten nested AND clauses into a single and-clause list.
316  *
317  * Input is the arglist of an AND clause.
318  * Returns the rebuilt arglist (note original list structure is not touched).
319  */
320 static List *
pull_ands(List * andlist)321 pull_ands(List *andlist)
322 {
323 	List	   *out_list = NIL;
324 	ListCell   *arg;
325 
326 	foreach(arg, andlist)
327 	{
328 		Node	   *subexpr = (Node *) lfirst(arg);
329 
330 		/*
331 		 * Note: we can destructively concat the subexpression's arglist
332 		 * because we know the recursive invocation of pull_ands will have
333 		 * built a new arglist not shared with any other expr. Otherwise we'd
334 		 * need a list_copy here.
335 		 */
336 		if (and_clause(subexpr))
337 			out_list = list_concat(out_list,
338 								   pull_ands(((BoolExpr *) subexpr)->args));
339 		else
340 			out_list = lappend(out_list, subexpr);
341 	}
342 	return out_list;
343 }
344 
345 /*
346  * pull_ors
347  *	  Recursively flatten nested OR clauses into a single or-clause list.
348  *
349  * Input is the arglist of an OR clause.
350  * Returns the rebuilt arglist (note original list structure is not touched).
351  */
352 static List *
pull_ors(List * orlist)353 pull_ors(List *orlist)
354 {
355 	List	   *out_list = NIL;
356 	ListCell   *arg;
357 
358 	foreach(arg, orlist)
359 	{
360 		Node	   *subexpr = (Node *) lfirst(arg);
361 
362 		/*
363 		 * Note: we can destructively concat the subexpression's arglist
364 		 * because we know the recursive invocation of pull_ors will have
365 		 * built a new arglist not shared with any other expr. Otherwise we'd
366 		 * need a list_copy here.
367 		 */
368 		if (or_clause(subexpr))
369 			out_list = list_concat(out_list,
370 								   pull_ors(((BoolExpr *) subexpr)->args));
371 		else
372 			out_list = lappend(out_list, subexpr);
373 	}
374 	return out_list;
375 }
376 
377 
378 /*--------------------
379  * The following code attempts to apply the inverse OR distributive law:
380  *		((A AND B) OR (A AND C))  =>  (A AND (B OR C))
381  * That is, locate OR clauses in which every subclause contains an
382  * identical term, and pull out the duplicated terms.
383  *
384  * This may seem like a fairly useless activity, but it turns out to be
385  * applicable to many machine-generated queries, and there are also queries
386  * in some of the TPC benchmarks that need it.  This was in fact almost the
387  * sole useful side-effect of the old prepqual code that tried to force
388  * the query into canonical AND-of-ORs form: the canonical equivalent of
389  *		((A AND B) OR (A AND C))
390  * is
391  *		((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
392  * which the code was able to simplify to
393  *		(A AND (A OR C) AND (B OR A) AND (B OR C))
394  * thus successfully extracting the common condition A --- but at the cost
395  * of cluttering the qual with many redundant clauses.
396  *--------------------
397  */
398 
399 /*
400  * find_duplicate_ors
401  *	  Given a qualification tree with the NOTs pushed down, search for
402  *	  OR clauses to which the inverse OR distributive law might apply.
403  *	  Only the top-level AND/OR structure is searched.
404  *
405  * While at it, we remove any NULL constants within the top-level AND/OR
406  * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
407  * In general that would change the result, so eval_const_expressions can't
408  * do it; but at top level of WHERE, we don't need to distinguish between
409  * FALSE and NULL results, so it's valid to treat NULL::boolean the same
410  * as FALSE and then simplify AND/OR accordingly.  Conversely, in a top-level
411  * CHECK constraint, we may treat a NULL the same as TRUE.
412  *
413  * Returns the modified qualification.  AND/OR flatness is preserved.
414  */
415 static Expr *
find_duplicate_ors(Expr * qual,bool is_check)416 find_duplicate_ors(Expr *qual, bool is_check)
417 {
418 	if (or_clause((Node *) qual))
419 	{
420 		List	   *orlist = NIL;
421 		ListCell   *temp;
422 
423 		/* Recurse */
424 		foreach(temp, ((BoolExpr *) qual)->args)
425 		{
426 			Expr	   *arg = (Expr *) lfirst(temp);
427 
428 			arg = find_duplicate_ors(arg, is_check);
429 
430 			/* Get rid of any constant inputs */
431 			if (arg && IsA(arg, Const))
432 			{
433 				Const	   *carg = (Const *) arg;
434 
435 				if (is_check)
436 				{
437 					/* Within OR in CHECK, drop constant FALSE */
438 					if (!carg->constisnull && !DatumGetBool(carg->constvalue))
439 						continue;
440 					/* Constant TRUE or NULL, so OR reduces to TRUE */
441 					return (Expr *) makeBoolConst(true, false);
442 				}
443 				else
444 				{
445 					/* Within OR in WHERE, drop constant FALSE or NULL */
446 					if (carg->constisnull || !DatumGetBool(carg->constvalue))
447 						continue;
448 					/* Constant TRUE, so OR reduces to TRUE */
449 					return arg;
450 				}
451 			}
452 
453 			orlist = lappend(orlist, arg);
454 		}
455 
456 		/* Flatten any ORs pulled up to just below here */
457 		orlist = pull_ors(orlist);
458 
459 		/* Now we can look for duplicate ORs */
460 		return process_duplicate_ors(orlist);
461 	}
462 	else if (and_clause((Node *) qual))
463 	{
464 		List	   *andlist = NIL;
465 		ListCell   *temp;
466 
467 		/* Recurse */
468 		foreach(temp, ((BoolExpr *) qual)->args)
469 		{
470 			Expr	   *arg = (Expr *) lfirst(temp);
471 
472 			arg = find_duplicate_ors(arg, is_check);
473 
474 			/* Get rid of any constant inputs */
475 			if (arg && IsA(arg, Const))
476 			{
477 				Const	   *carg = (Const *) arg;
478 
479 				if (is_check)
480 				{
481 					/* Within AND in CHECK, drop constant TRUE or NULL */
482 					if (carg->constisnull || DatumGetBool(carg->constvalue))
483 						continue;
484 					/* Constant FALSE, so AND reduces to FALSE */
485 					return arg;
486 				}
487 				else
488 				{
489 					/* Within AND in WHERE, drop constant TRUE */
490 					if (!carg->constisnull && DatumGetBool(carg->constvalue))
491 						continue;
492 					/* Constant FALSE or NULL, so AND reduces to FALSE */
493 					return (Expr *) makeBoolConst(false, false);
494 				}
495 			}
496 
497 			andlist = lappend(andlist, arg);
498 		}
499 
500 		/* Flatten any ANDs introduced just below here */
501 		andlist = pull_ands(andlist);
502 
503 		/* AND of no inputs reduces to TRUE */
504 		if (andlist == NIL)
505 			return (Expr *) makeBoolConst(true, false);
506 
507 		/* Single-expression AND just reduces to that expression */
508 		if (list_length(andlist) == 1)
509 			return (Expr *) linitial(andlist);
510 
511 		/* Else we still need an AND node */
512 		return make_andclause(andlist);
513 	}
514 	else
515 		return qual;
516 }
517 
518 /*
519  * process_duplicate_ors
520  *	  Given a list of exprs which are ORed together, try to apply
521  *	  the inverse OR distributive law.
522  *
523  * Returns the resulting expression (could be an AND clause, an OR
524  * clause, or maybe even a single subexpression).
525  */
526 static Expr *
process_duplicate_ors(List * orlist)527 process_duplicate_ors(List *orlist)
528 {
529 	List	   *reference = NIL;
530 	int			num_subclauses = 0;
531 	List	   *winners;
532 	List	   *neworlist;
533 	ListCell   *temp;
534 
535 	/* OR of no inputs reduces to FALSE */
536 	if (orlist == NIL)
537 		return (Expr *) makeBoolConst(false, false);
538 
539 	/* Single-expression OR just reduces to that expression */
540 	if (list_length(orlist) == 1)
541 		return (Expr *) linitial(orlist);
542 
543 	/*
544 	 * Choose the shortest AND clause as the reference list --- obviously, any
545 	 * subclause not in this clause isn't in all the clauses. If we find a
546 	 * clause that's not an AND, we can treat it as a one-element AND clause,
547 	 * which necessarily wins as shortest.
548 	 */
549 	foreach(temp, orlist)
550 	{
551 		Expr	   *clause = (Expr *) lfirst(temp);
552 
553 		if (and_clause((Node *) clause))
554 		{
555 			List	   *subclauses = ((BoolExpr *) clause)->args;
556 			int			nclauses = list_length(subclauses);
557 
558 			if (reference == NIL || nclauses < num_subclauses)
559 			{
560 				reference = subclauses;
561 				num_subclauses = nclauses;
562 			}
563 		}
564 		else
565 		{
566 			reference = list_make1(clause);
567 			break;
568 		}
569 	}
570 
571 	/*
572 	 * Just in case, eliminate any duplicates in the reference list.
573 	 */
574 	reference = list_union(NIL, reference);
575 
576 	/*
577 	 * Check each element of the reference list to see if it's in all the OR
578 	 * clauses.  Build a new list of winning clauses.
579 	 */
580 	winners = NIL;
581 	foreach(temp, reference)
582 	{
583 		Expr	   *refclause = (Expr *) lfirst(temp);
584 		bool		win = true;
585 		ListCell   *temp2;
586 
587 		foreach(temp2, orlist)
588 		{
589 			Expr	   *clause = (Expr *) lfirst(temp2);
590 
591 			if (and_clause((Node *) clause))
592 			{
593 				if (!list_member(((BoolExpr *) clause)->args, refclause))
594 				{
595 					win = false;
596 					break;
597 				}
598 			}
599 			else
600 			{
601 				if (!equal(refclause, clause))
602 				{
603 					win = false;
604 					break;
605 				}
606 			}
607 		}
608 
609 		if (win)
610 			winners = lappend(winners, refclause);
611 	}
612 
613 	/*
614 	 * If no winners, we can't transform the OR
615 	 */
616 	if (winners == NIL)
617 		return make_orclause(orlist);
618 
619 	/*
620 	 * Generate new OR list consisting of the remaining sub-clauses.
621 	 *
622 	 * If any clause degenerates to empty, then we have a situation like (A
623 	 * AND B) OR (A), which can be reduced to just A --- that is, the
624 	 * additional conditions in other arms of the OR are irrelevant.
625 	 *
626 	 * Note that because we use list_difference, any multiple occurrences of a
627 	 * winning clause in an AND sub-clause will be removed automatically.
628 	 */
629 	neworlist = NIL;
630 	foreach(temp, orlist)
631 	{
632 		Expr	   *clause = (Expr *) lfirst(temp);
633 
634 		if (and_clause((Node *) clause))
635 		{
636 			List	   *subclauses = ((BoolExpr *) clause)->args;
637 
638 			subclauses = list_difference(subclauses, winners);
639 			if (subclauses != NIL)
640 			{
641 				if (list_length(subclauses) == 1)
642 					neworlist = lappend(neworlist, linitial(subclauses));
643 				else
644 					neworlist = lappend(neworlist, make_andclause(subclauses));
645 			}
646 			else
647 			{
648 				neworlist = NIL;	/* degenerate case, see above */
649 				break;
650 			}
651 		}
652 		else
653 		{
654 			if (!list_member(winners, clause))
655 				neworlist = lappend(neworlist, clause);
656 			else
657 			{
658 				neworlist = NIL;	/* degenerate case, see above */
659 				break;
660 			}
661 		}
662 	}
663 
664 	/*
665 	 * Append reduced OR to the winners list, if it's not degenerate, handling
666 	 * the special case of one element correctly (can that really happen?).
667 	 * Also be careful to maintain AND/OR flatness in case we pulled up a
668 	 * sub-sub-OR-clause.
669 	 */
670 	if (neworlist != NIL)
671 	{
672 		if (list_length(neworlist) == 1)
673 			winners = lappend(winners, linitial(neworlist));
674 		else
675 			winners = lappend(winners, make_orclause(pull_ors(neworlist)));
676 	}
677 
678 	/*
679 	 * And return the constructed AND clause, again being wary of a single
680 	 * element and AND/OR flatness.
681 	 */
682 	if (list_length(winners) == 1)
683 		return (Expr *) linitial(winners);
684 	else
685 		return make_andclause(pull_ands(winners));
686 }
687