1 /*-------------------------------------------------------------------------
2 *
3 * prepqual.c
4 * Routines for preprocessing qualification expressions
5 *
6 *
7 * While the parser will produce flattened (N-argument) AND/OR trees from
8 * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9 * directly underneath another AND, or OR underneath OR, if the input was
10 * oddly parenthesized. Also, rule expansion and subquery flattening could
11 * produce such parsetrees. The planner wants to flatten all such cases
12 * to ensure consistent optimization behavior.
13 *
14 * Formerly, this module was responsible for doing the initial flattening,
15 * but now we leave it to eval_const_expressions to do that since it has to
16 * make a complete pass over the expression tree anyway. Instead, we just
17 * have to ensure that our manipulations preserve AND/OR flatness.
18 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19 * tree after local transformations that might introduce nested AND/ORs.
20 *
21 *
22 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
23 * Portions Copyright (c) 1994, Regents of the University of California
24 *
25 *
26 * IDENTIFICATION
27 * src/backend/optimizer/prep/prepqual.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32 #include "postgres.h"
33
34 #include "nodes/makefuncs.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/prep.h"
37 #include "utils/lsyscache.h"
38
39
40 static List *pull_ands(List *andlist);
41 static List *pull_ors(List *orlist);
42 static Expr *find_duplicate_ors(Expr *qual, bool is_check);
43 static Expr *process_duplicate_ors(List *orlist);
44
45
46 /*
47 * negate_clause
48 * Negate a Boolean expression.
49 *
50 * Input is a clause to be negated (e.g., the argument of a NOT clause).
51 * Returns a new clause equivalent to the negation of the given clause.
52 *
53 * Although this can be invoked on its own, it's mainly intended as a helper
54 * for eval_const_expressions(), and that context drives several design
55 * decisions. In particular, if the input is already AND/OR flat, we must
56 * preserve that property. We also don't bother to recurse in situations
57 * where we can assume that lower-level executions of eval_const_expressions
58 * would already have simplified sub-clauses of the input.
59 *
60 * The difference between this and a simple make_notclause() is that this
61 * tries to get rid of the NOT node by logical simplification. It's clearly
62 * always a win if the NOT node can be eliminated altogether. However, our
63 * use of DeMorgan's laws could result in having more NOT nodes rather than
64 * fewer. We do that unconditionally anyway, because in WHERE clauses it's
65 * important to expose as much top-level AND/OR structure as possible.
66 * Also, eliminating an intermediate NOT may allow us to flatten two levels
67 * of AND or OR together that we couldn't have otherwise. Finally, one of
68 * the motivations for doing this is to ensure that logically equivalent
69 * expressions will be seen as physically equal(), so we should always apply
70 * the same transformations.
71 */
72 Node *
negate_clause(Node * node)73 negate_clause(Node *node)
74 {
75 if (node == NULL) /* should not happen */
76 elog(ERROR, "can't negate an empty subexpression");
77 switch (nodeTag(node))
78 {
79 case T_Const:
80 {
81 Const *c = (Const *) node;
82
83 /* NOT NULL is still NULL */
84 if (c->constisnull)
85 return makeBoolConst(false, true);
86 /* otherwise pretty easy */
87 return makeBoolConst(!DatumGetBool(c->constvalue), false);
88 }
89 break;
90 case T_OpExpr:
91 {
92 /*
93 * Negate operator if possible: (NOT (< A B)) => (>= A B)
94 */
95 OpExpr *opexpr = (OpExpr *) node;
96 Oid negator = get_negator(opexpr->opno);
97
98 if (negator)
99 {
100 OpExpr *newopexpr = makeNode(OpExpr);
101
102 newopexpr->opno = negator;
103 newopexpr->opfuncid = InvalidOid;
104 newopexpr->opresulttype = opexpr->opresulttype;
105 newopexpr->opretset = opexpr->opretset;
106 newopexpr->opcollid = opexpr->opcollid;
107 newopexpr->inputcollid = opexpr->inputcollid;
108 newopexpr->args = opexpr->args;
109 newopexpr->location = opexpr->location;
110 return (Node *) newopexpr;
111 }
112 }
113 break;
114 case T_ScalarArrayOpExpr:
115 {
116 /*
117 * Negate a ScalarArrayOpExpr if its operator has a negator;
118 * for example x = ANY (list) becomes x <> ALL (list)
119 */
120 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
121 Oid negator = get_negator(saopexpr->opno);
122
123 if (negator)
124 {
125 ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
126
127 newopexpr->opno = negator;
128 newopexpr->opfuncid = InvalidOid;
129 newopexpr->useOr = !saopexpr->useOr;
130 newopexpr->inputcollid = saopexpr->inputcollid;
131 newopexpr->args = saopexpr->args;
132 newopexpr->location = saopexpr->location;
133 return (Node *) newopexpr;
134 }
135 }
136 break;
137 case T_BoolExpr:
138 {
139 BoolExpr *expr = (BoolExpr *) node;
140
141 switch (expr->boolop)
142 {
143 /*--------------------
144 * Apply DeMorgan's Laws:
145 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
146 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
147 * i.e., swap AND for OR and negate each subclause.
148 *
149 * If the input is already AND/OR flat and has no NOT
150 * directly above AND or OR, this transformation preserves
151 * those properties. For example, if no direct child of
152 * the given AND clause is an AND or a NOT-above-OR, then
153 * the recursive calls of negate_clause() can't return any
154 * OR clauses. So we needn't call pull_ors() before
155 * building a new OR clause. Similarly for the OR case.
156 *--------------------
157 */
158 case AND_EXPR:
159 {
160 List *nargs = NIL;
161 ListCell *lc;
162
163 foreach(lc, expr->args)
164 {
165 nargs = lappend(nargs,
166 negate_clause(lfirst(lc)));
167 }
168 return (Node *) make_orclause(nargs);
169 }
170 break;
171 case OR_EXPR:
172 {
173 List *nargs = NIL;
174 ListCell *lc;
175
176 foreach(lc, expr->args)
177 {
178 nargs = lappend(nargs,
179 negate_clause(lfirst(lc)));
180 }
181 return (Node *) make_andclause(nargs);
182 }
183 break;
184 case NOT_EXPR:
185
186 /*
187 * NOT underneath NOT: they cancel. We assume the
188 * input is already simplified, so no need to recurse.
189 */
190 return (Node *) linitial(expr->args);
191 default:
192 elog(ERROR, "unrecognized boolop: %d",
193 (int) expr->boolop);
194 break;
195 }
196 }
197 break;
198 case T_NullTest:
199 {
200 NullTest *expr = (NullTest *) node;
201
202 /*
203 * In the rowtype case, the two flavors of NullTest are *not*
204 * logical inverses, so we can't simplify. But it does work
205 * for scalar datatypes.
206 */
207 if (!expr->argisrow)
208 {
209 NullTest *newexpr = makeNode(NullTest);
210
211 newexpr->arg = expr->arg;
212 newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
213 IS_NOT_NULL : IS_NULL);
214 newexpr->argisrow = expr->argisrow;
215 newexpr->location = expr->location;
216 return (Node *) newexpr;
217 }
218 }
219 break;
220 case T_BooleanTest:
221 {
222 BooleanTest *expr = (BooleanTest *) node;
223 BooleanTest *newexpr = makeNode(BooleanTest);
224
225 newexpr->arg = expr->arg;
226 switch (expr->booltesttype)
227 {
228 case IS_TRUE:
229 newexpr->booltesttype = IS_NOT_TRUE;
230 break;
231 case IS_NOT_TRUE:
232 newexpr->booltesttype = IS_TRUE;
233 break;
234 case IS_FALSE:
235 newexpr->booltesttype = IS_NOT_FALSE;
236 break;
237 case IS_NOT_FALSE:
238 newexpr->booltesttype = IS_FALSE;
239 break;
240 case IS_UNKNOWN:
241 newexpr->booltesttype = IS_NOT_UNKNOWN;
242 break;
243 case IS_NOT_UNKNOWN:
244 newexpr->booltesttype = IS_UNKNOWN;
245 break;
246 default:
247 elog(ERROR, "unrecognized booltesttype: %d",
248 (int) expr->booltesttype);
249 break;
250 }
251 newexpr->location = expr->location;
252 return (Node *) newexpr;
253 }
254 break;
255 default:
256 /* else fall through */
257 break;
258 }
259
260 /*
261 * Otherwise we don't know how to simplify this, so just tack on an
262 * explicit NOT node.
263 */
264 return (Node *) make_notclause((Expr *) node);
265 }
266
267
268 /*
269 * canonicalize_qual
270 * Convert a qualification expression to the most useful form.
271 *
272 * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
273 * clauses. It can also be used on top-level CHECK constraints, for which
274 * pass is_check = true. DO NOT call it on any expression that is not known
275 * to be one or the other, as it might apply inappropriate simplifications.
276 *
277 * The name of this routine is a holdover from a time when it would try to
278 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
279 * Eventually, we recognized that that had more theoretical purity than
280 * actual usefulness, and so now the transformation doesn't involve any
281 * notion of reaching a canonical form.
282 *
283 * NOTE: we assume the input has already been through eval_const_expressions
284 * and therefore possesses AND/OR flatness. Formerly this function included
285 * its own flattening logic, but that requires a useless extra pass over the
286 * tree.
287 *
288 * Returns the modified qualification.
289 */
290 Expr *
canonicalize_qual(Expr * qual,bool is_check)291 canonicalize_qual(Expr *qual, bool is_check)
292 {
293 Expr *newqual;
294
295 /* Quick exit for empty qual */
296 if (qual == NULL)
297 return NULL;
298
299 /* This should not be invoked on quals in implicit-AND format */
300 Assert(!IsA(qual, List));
301
302 /*
303 * Pull up redundant subclauses in OR-of-AND trees. We do this only
304 * within the top-level AND/OR structure; there's no point in looking
305 * deeper. Also remove any NULL constants in the top-level structure.
306 */
307 newqual = find_duplicate_ors(qual, is_check);
308
309 return newqual;
310 }
311
312
313 /*
314 * pull_ands
315 * Recursively flatten nested AND clauses into a single and-clause list.
316 *
317 * Input is the arglist of an AND clause.
318 * Returns the rebuilt arglist (note original list structure is not touched).
319 */
320 static List *
pull_ands(List * andlist)321 pull_ands(List *andlist)
322 {
323 List *out_list = NIL;
324 ListCell *arg;
325
326 foreach(arg, andlist)
327 {
328 Node *subexpr = (Node *) lfirst(arg);
329
330 /*
331 * Note: we can destructively concat the subexpression's arglist
332 * because we know the recursive invocation of pull_ands will have
333 * built a new arglist not shared with any other expr. Otherwise we'd
334 * need a list_copy here.
335 */
336 if (and_clause(subexpr))
337 out_list = list_concat(out_list,
338 pull_ands(((BoolExpr *) subexpr)->args));
339 else
340 out_list = lappend(out_list, subexpr);
341 }
342 return out_list;
343 }
344
345 /*
346 * pull_ors
347 * Recursively flatten nested OR clauses into a single or-clause list.
348 *
349 * Input is the arglist of an OR clause.
350 * Returns the rebuilt arglist (note original list structure is not touched).
351 */
352 static List *
pull_ors(List * orlist)353 pull_ors(List *orlist)
354 {
355 List *out_list = NIL;
356 ListCell *arg;
357
358 foreach(arg, orlist)
359 {
360 Node *subexpr = (Node *) lfirst(arg);
361
362 /*
363 * Note: we can destructively concat the subexpression's arglist
364 * because we know the recursive invocation of pull_ors will have
365 * built a new arglist not shared with any other expr. Otherwise we'd
366 * need a list_copy here.
367 */
368 if (or_clause(subexpr))
369 out_list = list_concat(out_list,
370 pull_ors(((BoolExpr *) subexpr)->args));
371 else
372 out_list = lappend(out_list, subexpr);
373 }
374 return out_list;
375 }
376
377
378 /*--------------------
379 * The following code attempts to apply the inverse OR distributive law:
380 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
381 * That is, locate OR clauses in which every subclause contains an
382 * identical term, and pull out the duplicated terms.
383 *
384 * This may seem like a fairly useless activity, but it turns out to be
385 * applicable to many machine-generated queries, and there are also queries
386 * in some of the TPC benchmarks that need it. This was in fact almost the
387 * sole useful side-effect of the old prepqual code that tried to force
388 * the query into canonical AND-of-ORs form: the canonical equivalent of
389 * ((A AND B) OR (A AND C))
390 * is
391 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
392 * which the code was able to simplify to
393 * (A AND (A OR C) AND (B OR A) AND (B OR C))
394 * thus successfully extracting the common condition A --- but at the cost
395 * of cluttering the qual with many redundant clauses.
396 *--------------------
397 */
398
399 /*
400 * find_duplicate_ors
401 * Given a qualification tree with the NOTs pushed down, search for
402 * OR clauses to which the inverse OR distributive law might apply.
403 * Only the top-level AND/OR structure is searched.
404 *
405 * While at it, we remove any NULL constants within the top-level AND/OR
406 * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
407 * In general that would change the result, so eval_const_expressions can't
408 * do it; but at top level of WHERE, we don't need to distinguish between
409 * FALSE and NULL results, so it's valid to treat NULL::boolean the same
410 * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
411 * CHECK constraint, we may treat a NULL the same as TRUE.
412 *
413 * Returns the modified qualification. AND/OR flatness is preserved.
414 */
415 static Expr *
find_duplicate_ors(Expr * qual,bool is_check)416 find_duplicate_ors(Expr *qual, bool is_check)
417 {
418 if (or_clause((Node *) qual))
419 {
420 List *orlist = NIL;
421 ListCell *temp;
422
423 /* Recurse */
424 foreach(temp, ((BoolExpr *) qual)->args)
425 {
426 Expr *arg = (Expr *) lfirst(temp);
427
428 arg = find_duplicate_ors(arg, is_check);
429
430 /* Get rid of any constant inputs */
431 if (arg && IsA(arg, Const))
432 {
433 Const *carg = (Const *) arg;
434
435 if (is_check)
436 {
437 /* Within OR in CHECK, drop constant FALSE */
438 if (!carg->constisnull && !DatumGetBool(carg->constvalue))
439 continue;
440 /* Constant TRUE or NULL, so OR reduces to TRUE */
441 return (Expr *) makeBoolConst(true, false);
442 }
443 else
444 {
445 /* Within OR in WHERE, drop constant FALSE or NULL */
446 if (carg->constisnull || !DatumGetBool(carg->constvalue))
447 continue;
448 /* Constant TRUE, so OR reduces to TRUE */
449 return arg;
450 }
451 }
452
453 orlist = lappend(orlist, arg);
454 }
455
456 /* Flatten any ORs pulled up to just below here */
457 orlist = pull_ors(orlist);
458
459 /* Now we can look for duplicate ORs */
460 return process_duplicate_ors(orlist);
461 }
462 else if (and_clause((Node *) qual))
463 {
464 List *andlist = NIL;
465 ListCell *temp;
466
467 /* Recurse */
468 foreach(temp, ((BoolExpr *) qual)->args)
469 {
470 Expr *arg = (Expr *) lfirst(temp);
471
472 arg = find_duplicate_ors(arg, is_check);
473
474 /* Get rid of any constant inputs */
475 if (arg && IsA(arg, Const))
476 {
477 Const *carg = (Const *) arg;
478
479 if (is_check)
480 {
481 /* Within AND in CHECK, drop constant TRUE or NULL */
482 if (carg->constisnull || DatumGetBool(carg->constvalue))
483 continue;
484 /* Constant FALSE, so AND reduces to FALSE */
485 return arg;
486 }
487 else
488 {
489 /* Within AND in WHERE, drop constant TRUE */
490 if (!carg->constisnull && DatumGetBool(carg->constvalue))
491 continue;
492 /* Constant FALSE or NULL, so AND reduces to FALSE */
493 return (Expr *) makeBoolConst(false, false);
494 }
495 }
496
497 andlist = lappend(andlist, arg);
498 }
499
500 /* Flatten any ANDs introduced just below here */
501 andlist = pull_ands(andlist);
502
503 /* AND of no inputs reduces to TRUE */
504 if (andlist == NIL)
505 return (Expr *) makeBoolConst(true, false);
506
507 /* Single-expression AND just reduces to that expression */
508 if (list_length(andlist) == 1)
509 return (Expr *) linitial(andlist);
510
511 /* Else we still need an AND node */
512 return make_andclause(andlist);
513 }
514 else
515 return qual;
516 }
517
518 /*
519 * process_duplicate_ors
520 * Given a list of exprs which are ORed together, try to apply
521 * the inverse OR distributive law.
522 *
523 * Returns the resulting expression (could be an AND clause, an OR
524 * clause, or maybe even a single subexpression).
525 */
526 static Expr *
process_duplicate_ors(List * orlist)527 process_duplicate_ors(List *orlist)
528 {
529 List *reference = NIL;
530 int num_subclauses = 0;
531 List *winners;
532 List *neworlist;
533 ListCell *temp;
534
535 /* OR of no inputs reduces to FALSE */
536 if (orlist == NIL)
537 return (Expr *) makeBoolConst(false, false);
538
539 /* Single-expression OR just reduces to that expression */
540 if (list_length(orlist) == 1)
541 return (Expr *) linitial(orlist);
542
543 /*
544 * Choose the shortest AND clause as the reference list --- obviously, any
545 * subclause not in this clause isn't in all the clauses. If we find a
546 * clause that's not an AND, we can treat it as a one-element AND clause,
547 * which necessarily wins as shortest.
548 */
549 foreach(temp, orlist)
550 {
551 Expr *clause = (Expr *) lfirst(temp);
552
553 if (and_clause((Node *) clause))
554 {
555 List *subclauses = ((BoolExpr *) clause)->args;
556 int nclauses = list_length(subclauses);
557
558 if (reference == NIL || nclauses < num_subclauses)
559 {
560 reference = subclauses;
561 num_subclauses = nclauses;
562 }
563 }
564 else
565 {
566 reference = list_make1(clause);
567 break;
568 }
569 }
570
571 /*
572 * Just in case, eliminate any duplicates in the reference list.
573 */
574 reference = list_union(NIL, reference);
575
576 /*
577 * Check each element of the reference list to see if it's in all the OR
578 * clauses. Build a new list of winning clauses.
579 */
580 winners = NIL;
581 foreach(temp, reference)
582 {
583 Expr *refclause = (Expr *) lfirst(temp);
584 bool win = true;
585 ListCell *temp2;
586
587 foreach(temp2, orlist)
588 {
589 Expr *clause = (Expr *) lfirst(temp2);
590
591 if (and_clause((Node *) clause))
592 {
593 if (!list_member(((BoolExpr *) clause)->args, refclause))
594 {
595 win = false;
596 break;
597 }
598 }
599 else
600 {
601 if (!equal(refclause, clause))
602 {
603 win = false;
604 break;
605 }
606 }
607 }
608
609 if (win)
610 winners = lappend(winners, refclause);
611 }
612
613 /*
614 * If no winners, we can't transform the OR
615 */
616 if (winners == NIL)
617 return make_orclause(orlist);
618
619 /*
620 * Generate new OR list consisting of the remaining sub-clauses.
621 *
622 * If any clause degenerates to empty, then we have a situation like (A
623 * AND B) OR (A), which can be reduced to just A --- that is, the
624 * additional conditions in other arms of the OR are irrelevant.
625 *
626 * Note that because we use list_difference, any multiple occurrences of a
627 * winning clause in an AND sub-clause will be removed automatically.
628 */
629 neworlist = NIL;
630 foreach(temp, orlist)
631 {
632 Expr *clause = (Expr *) lfirst(temp);
633
634 if (and_clause((Node *) clause))
635 {
636 List *subclauses = ((BoolExpr *) clause)->args;
637
638 subclauses = list_difference(subclauses, winners);
639 if (subclauses != NIL)
640 {
641 if (list_length(subclauses) == 1)
642 neworlist = lappend(neworlist, linitial(subclauses));
643 else
644 neworlist = lappend(neworlist, make_andclause(subclauses));
645 }
646 else
647 {
648 neworlist = NIL; /* degenerate case, see above */
649 break;
650 }
651 }
652 else
653 {
654 if (!list_member(winners, clause))
655 neworlist = lappend(neworlist, clause);
656 else
657 {
658 neworlist = NIL; /* degenerate case, see above */
659 break;
660 }
661 }
662 }
663
664 /*
665 * Append reduced OR to the winners list, if it's not degenerate, handling
666 * the special case of one element correctly (can that really happen?).
667 * Also be careful to maintain AND/OR flatness in case we pulled up a
668 * sub-sub-OR-clause.
669 */
670 if (neworlist != NIL)
671 {
672 if (list_length(neworlist) == 1)
673 winners = lappend(winners, linitial(neworlist));
674 else
675 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
676 }
677
678 /*
679 * And return the constructed AND clause, again being wary of a single
680 * element and AND/OR flatness.
681 */
682 if (list_length(winners) == 1)
683 return (Expr *) linitial(winners);
684 else
685 return make_andclause(pull_ands(winners));
686 }
687