1 /*-------------------------------------------------------------------------
2 *
3 * pathkeys.c
4 * Utilities for matching and building path keys
5 *
6 * See src/backend/optimizer/README for a great deal of information about
7 * the nature and use of path keys.
8 *
9 *
10 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * IDENTIFICATION
14 * src/backend/optimizer/path/pathkeys.c
15 *
16 *-------------------------------------------------------------------------
17 */
18 #include "postgres.h"
19
20 #include "access/stratnum.h"
21 #include "catalog/pg_opfamily.h"
22 #include "nodes/makefuncs.h"
23 #include "nodes/nodeFuncs.h"
24 #include "nodes/plannodes.h"
25 #include "optimizer/optimizer.h"
26 #include "optimizer/pathnode.h"
27 #include "optimizer/paths.h"
28 #include "partitioning/partbounds.h"
29 #include "utils/lsyscache.h"
30
31
32 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
33 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
34 RelOptInfo *partrel,
35 int partkeycol);
36 static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
37 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
38
39
40 /****************************************************************************
41 * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
42 ****************************************************************************/
43
44 /*
45 * make_canonical_pathkey
46 * Given the parameters for a PathKey, find any pre-existing matching
47 * pathkey in the query's list of "canonical" pathkeys. Make a new
48 * entry if there's not one already.
49 *
50 * Note that this function must not be used until after we have completed
51 * merging EquivalenceClasses.
52 */
53 PathKey *
make_canonical_pathkey(PlannerInfo * root,EquivalenceClass * eclass,Oid opfamily,int strategy,bool nulls_first)54 make_canonical_pathkey(PlannerInfo *root,
55 EquivalenceClass *eclass, Oid opfamily,
56 int strategy, bool nulls_first)
57 {
58 PathKey *pk;
59 ListCell *lc;
60 MemoryContext oldcontext;
61
62 /* Can't make canonical pathkeys if the set of ECs might still change */
63 if (!root->ec_merging_done)
64 elog(ERROR, "too soon to build canonical pathkeys");
65
66 /* The passed eclass might be non-canonical, so chase up to the top */
67 while (eclass->ec_merged)
68 eclass = eclass->ec_merged;
69
70 foreach(lc, root->canon_pathkeys)
71 {
72 pk = (PathKey *) lfirst(lc);
73 if (eclass == pk->pk_eclass &&
74 opfamily == pk->pk_opfamily &&
75 strategy == pk->pk_strategy &&
76 nulls_first == pk->pk_nulls_first)
77 return pk;
78 }
79
80 /*
81 * Be sure canonical pathkeys are allocated in the main planning context.
82 * Not an issue in normal planning, but it is for GEQO.
83 */
84 oldcontext = MemoryContextSwitchTo(root->planner_cxt);
85
86 pk = makeNode(PathKey);
87 pk->pk_eclass = eclass;
88 pk->pk_opfamily = opfamily;
89 pk->pk_strategy = strategy;
90 pk->pk_nulls_first = nulls_first;
91
92 root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
93
94 MemoryContextSwitchTo(oldcontext);
95
96 return pk;
97 }
98
99 /*
100 * pathkey_is_redundant
101 * Is a pathkey redundant with one already in the given list?
102 *
103 * We detect two cases:
104 *
105 * 1. If the new pathkey's equivalence class contains a constant, and isn't
106 * below an outer join, then we can disregard it as a sort key. An example:
107 * SELECT ... WHERE x = 42 ORDER BY x, y;
108 * We may as well just sort by y. Note that because of opfamily matching,
109 * this is semantically correct: we know that the equality constraint is one
110 * that actually binds the variable to a single value in the terms of any
111 * ordering operator that might go with the eclass. This rule not only lets
112 * us simplify (or even skip) explicit sorts, but also allows matching index
113 * sort orders to a query when there are don't-care index columns.
114 *
115 * 2. If the new pathkey's equivalence class is the same as that of any
116 * existing member of the pathkey list, then it is redundant. Some examples:
117 * SELECT ... ORDER BY x, x;
118 * SELECT ... ORDER BY x, x DESC;
119 * SELECT ... WHERE x = y ORDER BY x, y;
120 * In all these cases the second sort key cannot distinguish values that are
121 * considered equal by the first, and so there's no point in using it.
122 * Note in particular that we need not compare opfamily (all the opfamilies
123 * of the EC have the same notion of equality) nor sort direction.
124 *
125 * Both the given pathkey and the list members must be canonical for this
126 * to work properly, but that's okay since we no longer ever construct any
127 * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
128 * canonical includes the additional requirement of no redundant entries,
129 * which is exactly what we are checking for here.)
130 *
131 * Because the equivclass.c machinery forms only one copy of any EC per query,
132 * pointer comparison is enough to decide whether canonical ECs are the same.
133 */
134 static bool
pathkey_is_redundant(PathKey * new_pathkey,List * pathkeys)135 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
136 {
137 EquivalenceClass *new_ec = new_pathkey->pk_eclass;
138 ListCell *lc;
139
140 /* Check for EC containing a constant --- unconditionally redundant */
141 if (EC_MUST_BE_REDUNDANT(new_ec))
142 return true;
143
144 /* If same EC already used in list, then redundant */
145 foreach(lc, pathkeys)
146 {
147 PathKey *old_pathkey = (PathKey *) lfirst(lc);
148
149 if (new_ec == old_pathkey->pk_eclass)
150 return true;
151 }
152
153 return false;
154 }
155
156 /*
157 * make_pathkey_from_sortinfo
158 * Given an expression and sort-order information, create a PathKey.
159 * The result is always a "canonical" PathKey, but it might be redundant.
160 *
161 * expr is the expression, and nullable_relids is the set of base relids
162 * that are potentially nullable below it.
163 *
164 * If the PathKey is being generated from a SortGroupClause, sortref should be
165 * the SortGroupClause's SortGroupRef; otherwise zero.
166 *
167 * If rel is not NULL, it identifies a specific relation we're considering
168 * a path for, and indicates that child EC members for that relation can be
169 * considered. Otherwise child members are ignored. (See the comments for
170 * get_eclass_for_sort_expr.)
171 *
172 * create_it is true if we should create any missing EquivalenceClass
173 * needed to represent the sort key. If it's false, we return NULL if the
174 * sort key isn't already present in any EquivalenceClass.
175 */
176 static PathKey *
make_pathkey_from_sortinfo(PlannerInfo * root,Expr * expr,Relids nullable_relids,Oid opfamily,Oid opcintype,Oid collation,bool reverse_sort,bool nulls_first,Index sortref,Relids rel,bool create_it)177 make_pathkey_from_sortinfo(PlannerInfo *root,
178 Expr *expr,
179 Relids nullable_relids,
180 Oid opfamily,
181 Oid opcintype,
182 Oid collation,
183 bool reverse_sort,
184 bool nulls_first,
185 Index sortref,
186 Relids rel,
187 bool create_it)
188 {
189 int16 strategy;
190 Oid equality_op;
191 List *opfamilies;
192 EquivalenceClass *eclass;
193
194 strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
195
196 /*
197 * EquivalenceClasses need to contain opfamily lists based on the family
198 * membership of mergejoinable equality operators, which could belong to
199 * more than one opfamily. So we have to look up the opfamily's equality
200 * operator and get its membership.
201 */
202 equality_op = get_opfamily_member(opfamily,
203 opcintype,
204 opcintype,
205 BTEqualStrategyNumber);
206 if (!OidIsValid(equality_op)) /* shouldn't happen */
207 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
208 BTEqualStrategyNumber, opcintype, opcintype, opfamily);
209 opfamilies = get_mergejoin_opfamilies(equality_op);
210 if (!opfamilies) /* certainly should find some */
211 elog(ERROR, "could not find opfamilies for equality operator %u",
212 equality_op);
213
214 /* Now find or (optionally) create a matching EquivalenceClass */
215 eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
216 opfamilies, opcintype, collation,
217 sortref, rel, create_it);
218
219 /* Fail if no EC and !create_it */
220 if (!eclass)
221 return NULL;
222
223 /* And finally we can find or create a PathKey node */
224 return make_canonical_pathkey(root, eclass, opfamily,
225 strategy, nulls_first);
226 }
227
228 /*
229 * make_pathkey_from_sortop
230 * Like make_pathkey_from_sortinfo, but work from a sort operator.
231 *
232 * This should eventually go away, but we need to restructure SortGroupClause
233 * first.
234 */
235 static PathKey *
make_pathkey_from_sortop(PlannerInfo * root,Expr * expr,Relids nullable_relids,Oid ordering_op,bool nulls_first,Index sortref,bool create_it)236 make_pathkey_from_sortop(PlannerInfo *root,
237 Expr *expr,
238 Relids nullable_relids,
239 Oid ordering_op,
240 bool nulls_first,
241 Index sortref,
242 bool create_it)
243 {
244 Oid opfamily,
245 opcintype,
246 collation;
247 int16 strategy;
248
249 /* Find the operator in pg_amop --- failure shouldn't happen */
250 if (!get_ordering_op_properties(ordering_op,
251 &opfamily, &opcintype, &strategy))
252 elog(ERROR, "operator %u is not a valid ordering operator",
253 ordering_op);
254
255 /* Because SortGroupClause doesn't carry collation, consult the expr */
256 collation = exprCollation((Node *) expr);
257
258 return make_pathkey_from_sortinfo(root,
259 expr,
260 nullable_relids,
261 opfamily,
262 opcintype,
263 collation,
264 (strategy == BTGreaterStrategyNumber),
265 nulls_first,
266 sortref,
267 NULL,
268 create_it);
269 }
270
271
272 /****************************************************************************
273 * PATHKEY COMPARISONS
274 ****************************************************************************/
275
276 /*
277 * compare_pathkeys
278 * Compare two pathkeys to see if they are equivalent, and if not whether
279 * one is "better" than the other.
280 *
281 * We assume the pathkeys are canonical, and so they can be checked for
282 * equality by simple pointer comparison.
283 */
284 PathKeysComparison
compare_pathkeys(List * keys1,List * keys2)285 compare_pathkeys(List *keys1, List *keys2)
286 {
287 ListCell *key1,
288 *key2;
289
290 /*
291 * Fall out quickly if we are passed two identical lists. This mostly
292 * catches the case where both are NIL, but that's common enough to
293 * warrant the test.
294 */
295 if (keys1 == keys2)
296 return PATHKEYS_EQUAL;
297
298 forboth(key1, keys1, key2, keys2)
299 {
300 PathKey *pathkey1 = (PathKey *) lfirst(key1);
301 PathKey *pathkey2 = (PathKey *) lfirst(key2);
302
303 if (pathkey1 != pathkey2)
304 return PATHKEYS_DIFFERENT; /* no need to keep looking */
305 }
306
307 /*
308 * If we reached the end of only one list, the other is longer and
309 * therefore not a subset.
310 */
311 if (key1 != NULL)
312 return PATHKEYS_BETTER1; /* key1 is longer */
313 if (key2 != NULL)
314 return PATHKEYS_BETTER2; /* key2 is longer */
315 return PATHKEYS_EQUAL;
316 }
317
318 /*
319 * pathkeys_contained_in
320 * Common special case of compare_pathkeys: we just want to know
321 * if keys2 are at least as well sorted as keys1.
322 */
323 bool
pathkeys_contained_in(List * keys1,List * keys2)324 pathkeys_contained_in(List *keys1, List *keys2)
325 {
326 switch (compare_pathkeys(keys1, keys2))
327 {
328 case PATHKEYS_EQUAL:
329 case PATHKEYS_BETTER2:
330 return true;
331 default:
332 break;
333 }
334 return false;
335 }
336
337 /*
338 * pathkeys_count_contained_in
339 * Same as pathkeys_contained_in, but also sets length of longest
340 * common prefix of keys1 and keys2.
341 */
342 bool
pathkeys_count_contained_in(List * keys1,List * keys2,int * n_common)343 pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
344 {
345 int n = 0;
346 ListCell *key1,
347 *key2;
348
349 /*
350 * See if we can avoiding looping through both lists. This optimization
351 * gains us several percent in planning time in a worst-case test.
352 */
353 if (keys1 == keys2)
354 {
355 *n_common = list_length(keys1);
356 return true;
357 }
358 else if (keys1 == NIL)
359 {
360 *n_common = 0;
361 return true;
362 }
363 else if (keys2 == NIL)
364 {
365 *n_common = 0;
366 return false;
367 }
368
369 /*
370 * If both lists are non-empty, iterate through both to find out how many
371 * items are shared.
372 */
373 forboth(key1, keys1, key2, keys2)
374 {
375 PathKey *pathkey1 = (PathKey *) lfirst(key1);
376 PathKey *pathkey2 = (PathKey *) lfirst(key2);
377
378 if (pathkey1 != pathkey2)
379 {
380 *n_common = n;
381 return false;
382 }
383 n++;
384 }
385
386 /* If we ended with a null value, then we've processed the whole list. */
387 *n_common = n;
388 return (key1 == NULL);
389 }
390
391 /*
392 * get_cheapest_path_for_pathkeys
393 * Find the cheapest path (according to the specified criterion) that
394 * satisfies the given pathkeys and parameterization.
395 * Return NULL if no such path.
396 *
397 * 'paths' is a list of possible paths that all generate the same relation
398 * 'pathkeys' represents a required ordering (in canonical form!)
399 * 'required_outer' denotes allowable outer relations for parameterized paths
400 * 'cost_criterion' is STARTUP_COST or TOTAL_COST
401 * 'require_parallel_safe' causes us to consider only parallel-safe paths
402 */
403 Path *
get_cheapest_path_for_pathkeys(List * paths,List * pathkeys,Relids required_outer,CostSelector cost_criterion,bool require_parallel_safe)404 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
405 Relids required_outer,
406 CostSelector cost_criterion,
407 bool require_parallel_safe)
408 {
409 Path *matched_path = NULL;
410 ListCell *l;
411
412 foreach(l, paths)
413 {
414 Path *path = (Path *) lfirst(l);
415
416 /*
417 * Since cost comparison is a lot cheaper than pathkey comparison, do
418 * that first. (XXX is that still true?)
419 */
420 if (matched_path != NULL &&
421 compare_path_costs(matched_path, path, cost_criterion) <= 0)
422 continue;
423
424 if (require_parallel_safe && !path->parallel_safe)
425 continue;
426
427 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
428 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
429 matched_path = path;
430 }
431 return matched_path;
432 }
433
434 /*
435 * get_cheapest_fractional_path_for_pathkeys
436 * Find the cheapest path (for retrieving a specified fraction of all
437 * the tuples) that satisfies the given pathkeys and parameterization.
438 * Return NULL if no such path.
439 *
440 * See compare_fractional_path_costs() for the interpretation of the fraction
441 * parameter.
442 *
443 * 'paths' is a list of possible paths that all generate the same relation
444 * 'pathkeys' represents a required ordering (in canonical form!)
445 * 'required_outer' denotes allowable outer relations for parameterized paths
446 * 'fraction' is the fraction of the total tuples expected to be retrieved
447 */
448 Path *
get_cheapest_fractional_path_for_pathkeys(List * paths,List * pathkeys,Relids required_outer,double fraction)449 get_cheapest_fractional_path_for_pathkeys(List *paths,
450 List *pathkeys,
451 Relids required_outer,
452 double fraction)
453 {
454 Path *matched_path = NULL;
455 ListCell *l;
456
457 foreach(l, paths)
458 {
459 Path *path = (Path *) lfirst(l);
460
461 /*
462 * Since cost comparison is a lot cheaper than pathkey comparison, do
463 * that first. (XXX is that still true?)
464 */
465 if (matched_path != NULL &&
466 compare_fractional_path_costs(matched_path, path, fraction) <= 0)
467 continue;
468
469 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
470 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
471 matched_path = path;
472 }
473 return matched_path;
474 }
475
476
477 /*
478 * get_cheapest_parallel_safe_total_inner
479 * Find the unparameterized parallel-safe path with the least total cost.
480 */
481 Path *
get_cheapest_parallel_safe_total_inner(List * paths)482 get_cheapest_parallel_safe_total_inner(List *paths)
483 {
484 ListCell *l;
485
486 foreach(l, paths)
487 {
488 Path *innerpath = (Path *) lfirst(l);
489
490 if (innerpath->parallel_safe &&
491 bms_is_empty(PATH_REQ_OUTER(innerpath)))
492 return innerpath;
493 }
494
495 return NULL;
496 }
497
498 /****************************************************************************
499 * NEW PATHKEY FORMATION
500 ****************************************************************************/
501
502 /*
503 * build_index_pathkeys
504 * Build a pathkeys list that describes the ordering induced by an index
505 * scan using the given index. (Note that an unordered index doesn't
506 * induce any ordering, so we return NIL.)
507 *
508 * If 'scandir' is BackwardScanDirection, build pathkeys representing a
509 * backwards scan of the index.
510 *
511 * We iterate only key columns of covering indexes, since non-key columns
512 * don't influence index ordering. The result is canonical, meaning that
513 * redundant pathkeys are removed; it may therefore have fewer entries than
514 * there are key columns in the index.
515 *
516 * Another reason for stopping early is that we may be able to tell that
517 * an index column's sort order is uninteresting for this query. However,
518 * that test is just based on the existence of an EquivalenceClass and not
519 * on position in pathkey lists, so it's not complete. Caller should call
520 * truncate_useless_pathkeys() to possibly remove more pathkeys.
521 */
522 List *
build_index_pathkeys(PlannerInfo * root,IndexOptInfo * index,ScanDirection scandir)523 build_index_pathkeys(PlannerInfo *root,
524 IndexOptInfo *index,
525 ScanDirection scandir)
526 {
527 List *retval = NIL;
528 ListCell *lc;
529 int i;
530
531 if (index->sortopfamily == NULL)
532 return NIL; /* non-orderable index */
533
534 i = 0;
535 foreach(lc, index->indextlist)
536 {
537 TargetEntry *indextle = (TargetEntry *) lfirst(lc);
538 Expr *indexkey;
539 bool reverse_sort;
540 bool nulls_first;
541 PathKey *cpathkey;
542
543 /*
544 * INCLUDE columns are stored in index unordered, so they don't
545 * support ordered index scan.
546 */
547 if (i >= index->nkeycolumns)
548 break;
549
550 /* We assume we don't need to make a copy of the tlist item */
551 indexkey = indextle->expr;
552
553 if (ScanDirectionIsBackward(scandir))
554 {
555 reverse_sort = !index->reverse_sort[i];
556 nulls_first = !index->nulls_first[i];
557 }
558 else
559 {
560 reverse_sort = index->reverse_sort[i];
561 nulls_first = index->nulls_first[i];
562 }
563
564 /*
565 * OK, try to make a canonical pathkey for this sort key. Note we're
566 * underneath any outer joins, so nullable_relids should be NULL.
567 */
568 cpathkey = make_pathkey_from_sortinfo(root,
569 indexkey,
570 NULL,
571 index->sortopfamily[i],
572 index->opcintype[i],
573 index->indexcollations[i],
574 reverse_sort,
575 nulls_first,
576 0,
577 index->rel->relids,
578 false);
579
580 if (cpathkey)
581 {
582 /*
583 * We found the sort key in an EquivalenceClass, so it's relevant
584 * for this query. Add it to list, unless it's redundant.
585 */
586 if (!pathkey_is_redundant(cpathkey, retval))
587 retval = lappend(retval, cpathkey);
588 }
589 else
590 {
591 /*
592 * Boolean index keys might be redundant even if they do not
593 * appear in an EquivalenceClass, because of our special treatment
594 * of boolean equality conditions --- see the comment for
595 * indexcol_is_bool_constant_for_query(). If that applies, we can
596 * continue to examine lower-order index columns. Otherwise, the
597 * sort key is not an interesting sort order for this query, so we
598 * should stop considering index columns; any lower-order sort
599 * keys won't be useful either.
600 */
601 if (!indexcol_is_bool_constant_for_query(root, index, i))
602 break;
603 }
604
605 i++;
606 }
607
608 return retval;
609 }
610
611 /*
612 * partkey_is_bool_constant_for_query
613 *
614 * If a partition key column is constrained to have a constant value by the
615 * query's WHERE conditions, then it's irrelevant for sort-order
616 * considerations. Usually that means we have a restriction clause
617 * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
618 * containing a constant, which is recognized as redundant by
619 * build_partition_pathkeys(). But if the partition key column is a
620 * boolean variable (or expression), then we are not going to see such a
621 * WHERE clause, because expression preprocessing will have simplified it
622 * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
623 * to have a matching EquivalenceClass (unless the query also contains
624 * "ORDER BY partkeycol"). To allow such cases to work the same as they would
625 * for non-boolean values, this function is provided to detect whether the
626 * specified partition key column matches a boolean restriction clause.
627 */
628 static bool
partkey_is_bool_constant_for_query(RelOptInfo * partrel,int partkeycol)629 partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
630 {
631 PartitionScheme partscheme = partrel->part_scheme;
632 ListCell *lc;
633
634 /* If the partkey isn't boolean, we can't possibly get a match */
635 if (!IsBooleanOpfamily(partscheme->partopfamily[partkeycol]))
636 return false;
637
638 /* Check each restriction clause for the partitioned rel */
639 foreach(lc, partrel->baserestrictinfo)
640 {
641 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
642
643 /* Ignore pseudoconstant quals, they won't match */
644 if (rinfo->pseudoconstant)
645 continue;
646
647 /* See if we can match the clause's expression to the partkey column */
648 if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
649 return true;
650 }
651
652 return false;
653 }
654
655 /*
656 * matches_boolean_partition_clause
657 * Determine if the boolean clause described by rinfo matches
658 * partrel's partkeycol-th partition key column.
659 *
660 * "Matches" can be either an exact match (equivalent to partkey = true),
661 * or a NOT above an exact match (equivalent to partkey = false).
662 */
663 static bool
matches_boolean_partition_clause(RestrictInfo * rinfo,RelOptInfo * partrel,int partkeycol)664 matches_boolean_partition_clause(RestrictInfo *rinfo,
665 RelOptInfo *partrel, int partkeycol)
666 {
667 Node *clause = (Node *) rinfo->clause;
668 Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
669
670 /* Direct match? */
671 if (equal(partexpr, clause))
672 return true;
673 /* NOT clause? */
674 else if (is_notclause(clause))
675 {
676 Node *arg = (Node *) get_notclausearg((Expr *) clause);
677
678 if (equal(partexpr, arg))
679 return true;
680 }
681
682 return false;
683 }
684
685 /*
686 * build_partition_pathkeys
687 * Build a pathkeys list that describes the ordering induced by the
688 * partitions of partrel, under either forward or backward scan
689 * as per scandir.
690 *
691 * Caller must have checked that the partitions are properly ordered,
692 * as detected by partitions_are_ordered().
693 *
694 * Sets *partialkeys to true if pathkeys were only built for a prefix of the
695 * partition key, or false if the pathkeys include all columns of the
696 * partition key.
697 */
698 List *
build_partition_pathkeys(PlannerInfo * root,RelOptInfo * partrel,ScanDirection scandir,bool * partialkeys)699 build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
700 ScanDirection scandir, bool *partialkeys)
701 {
702 List *retval = NIL;
703 PartitionScheme partscheme = partrel->part_scheme;
704 int i;
705
706 Assert(partscheme != NULL);
707 Assert(partitions_are_ordered(partrel->boundinfo, partrel->nparts));
708 /* For now, we can only cope with baserels */
709 Assert(IS_SIMPLE_REL(partrel));
710
711 for (i = 0; i < partscheme->partnatts; i++)
712 {
713 PathKey *cpathkey;
714 Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
715
716 /*
717 * Try to make a canonical pathkey for this partkey.
718 *
719 * We're considering a baserel scan, so nullable_relids should be
720 * NULL. Also, we assume the PartitionDesc lists any NULL partition
721 * last, so we treat the scan like a NULLS LAST index: we have
722 * nulls_first for backwards scan only.
723 */
724 cpathkey = make_pathkey_from_sortinfo(root,
725 keyCol,
726 NULL,
727 partscheme->partopfamily[i],
728 partscheme->partopcintype[i],
729 partscheme->partcollation[i],
730 ScanDirectionIsBackward(scandir),
731 ScanDirectionIsBackward(scandir),
732 0,
733 partrel->relids,
734 false);
735
736
737 if (cpathkey)
738 {
739 /*
740 * We found the sort key in an EquivalenceClass, so it's relevant
741 * for this query. Add it to list, unless it's redundant.
742 */
743 if (!pathkey_is_redundant(cpathkey, retval))
744 retval = lappend(retval, cpathkey);
745 }
746 else
747 {
748 /*
749 * Boolean partition keys might be redundant even if they do not
750 * appear in an EquivalenceClass, because of our special treatment
751 * of boolean equality conditions --- see the comment for
752 * partkey_is_bool_constant_for_query(). If that applies, we can
753 * continue to examine lower-order partition keys. Otherwise, the
754 * sort key is not an interesting sort order for this query, so we
755 * should stop considering partition columns; any lower-order sort
756 * keys won't be useful either.
757 */
758 if (!partkey_is_bool_constant_for_query(partrel, i))
759 {
760 *partialkeys = true;
761 return retval;
762 }
763 }
764 }
765
766 *partialkeys = false;
767 return retval;
768 }
769
770 /*
771 * build_expression_pathkey
772 * Build a pathkeys list that describes an ordering by a single expression
773 * using the given sort operator.
774 *
775 * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
776 * We induce the other arguments assuming default sort order for the operator.
777 *
778 * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
779 * is false and the expression isn't already in some EquivalenceClass.
780 */
781 List *
build_expression_pathkey(PlannerInfo * root,Expr * expr,Relids nullable_relids,Oid opno,Relids rel,bool create_it)782 build_expression_pathkey(PlannerInfo *root,
783 Expr *expr,
784 Relids nullable_relids,
785 Oid opno,
786 Relids rel,
787 bool create_it)
788 {
789 List *pathkeys;
790 Oid opfamily,
791 opcintype;
792 int16 strategy;
793 PathKey *cpathkey;
794
795 /* Find the operator in pg_amop --- failure shouldn't happen */
796 if (!get_ordering_op_properties(opno,
797 &opfamily, &opcintype, &strategy))
798 elog(ERROR, "operator %u is not a valid ordering operator",
799 opno);
800
801 cpathkey = make_pathkey_from_sortinfo(root,
802 expr,
803 nullable_relids,
804 opfamily,
805 opcintype,
806 exprCollation((Node *) expr),
807 (strategy == BTGreaterStrategyNumber),
808 (strategy == BTGreaterStrategyNumber),
809 0,
810 rel,
811 create_it);
812
813 if (cpathkey)
814 pathkeys = list_make1(cpathkey);
815 else
816 pathkeys = NIL;
817
818 return pathkeys;
819 }
820
821 /*
822 * convert_subquery_pathkeys
823 * Build a pathkeys list that describes the ordering of a subquery's
824 * result, in the terms of the outer query. This is essentially a
825 * task of conversion.
826 *
827 * 'rel': outer query's RelOptInfo for the subquery relation.
828 * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
829 * 'subquery_tlist': the subquery's output targetlist, in its terms.
830 *
831 * We intentionally don't do truncate_useless_pathkeys() here, because there
832 * are situations where seeing the raw ordering of the subquery is helpful.
833 * For example, if it returns ORDER BY x DESC, that may prompt us to
834 * construct a mergejoin using DESC order rather than ASC order; but the
835 * right_merge_direction heuristic would have us throw the knowledge away.
836 */
837 List *
convert_subquery_pathkeys(PlannerInfo * root,RelOptInfo * rel,List * subquery_pathkeys,List * subquery_tlist)838 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
839 List *subquery_pathkeys,
840 List *subquery_tlist)
841 {
842 List *retval = NIL;
843 int retvallen = 0;
844 int outer_query_keys = list_length(root->query_pathkeys);
845 ListCell *i;
846
847 foreach(i, subquery_pathkeys)
848 {
849 PathKey *sub_pathkey = (PathKey *) lfirst(i);
850 EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
851 PathKey *best_pathkey = NULL;
852
853 if (sub_eclass->ec_has_volatile)
854 {
855 /*
856 * If the sub_pathkey's EquivalenceClass is volatile, then it must
857 * have come from an ORDER BY clause, and we have to match it to
858 * that same targetlist entry.
859 */
860 TargetEntry *tle;
861 Var *outer_var;
862
863 if (sub_eclass->ec_sortref == 0) /* can't happen */
864 elog(ERROR, "volatile EquivalenceClass has no sortref");
865 tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
866 Assert(tle);
867 /* Is TLE actually available to the outer query? */
868 outer_var = find_var_for_subquery_tle(rel, tle);
869 if (outer_var)
870 {
871 /* We can represent this sub_pathkey */
872 EquivalenceMember *sub_member;
873 EquivalenceClass *outer_ec;
874
875 Assert(list_length(sub_eclass->ec_members) == 1);
876 sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
877
878 /*
879 * Note: it might look funny to be setting sortref = 0 for a
880 * reference to a volatile sub_eclass. However, the
881 * expression is *not* volatile in the outer query: it's just
882 * a Var referencing whatever the subquery emitted. (IOW, the
883 * outer query isn't going to re-execute the volatile
884 * expression itself.) So this is okay. Likewise, it's
885 * correct to pass nullable_relids = NULL, because we're
886 * underneath any outer joins appearing in the outer query.
887 */
888 outer_ec =
889 get_eclass_for_sort_expr(root,
890 (Expr *) outer_var,
891 NULL,
892 sub_eclass->ec_opfamilies,
893 sub_member->em_datatype,
894 sub_eclass->ec_collation,
895 0,
896 rel->relids,
897 false);
898
899 /*
900 * If we don't find a matching EC, sub-pathkey isn't
901 * interesting to the outer query
902 */
903 if (outer_ec)
904 best_pathkey =
905 make_canonical_pathkey(root,
906 outer_ec,
907 sub_pathkey->pk_opfamily,
908 sub_pathkey->pk_strategy,
909 sub_pathkey->pk_nulls_first);
910 }
911 }
912 else
913 {
914 /*
915 * Otherwise, the sub_pathkey's EquivalenceClass could contain
916 * multiple elements (representing knowledge that multiple items
917 * are effectively equal). Each element might match none, one, or
918 * more of the output columns that are visible to the outer query.
919 * This means we may have multiple possible representations of the
920 * sub_pathkey in the context of the outer query. Ideally we
921 * would generate them all and put them all into an EC of the
922 * outer query, thereby propagating equality knowledge up to the
923 * outer query. Right now we cannot do so, because the outer
924 * query's EquivalenceClasses are already frozen when this is
925 * called. Instead we prefer the one that has the highest "score"
926 * (number of EC peers, plus one if it matches the outer
927 * query_pathkeys). This is the most likely to be useful in the
928 * outer query.
929 */
930 int best_score = -1;
931 ListCell *j;
932
933 foreach(j, sub_eclass->ec_members)
934 {
935 EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
936 Expr *sub_expr = sub_member->em_expr;
937 Oid sub_expr_type = sub_member->em_datatype;
938 Oid sub_expr_coll = sub_eclass->ec_collation;
939 ListCell *k;
940
941 if (sub_member->em_is_child)
942 continue; /* ignore children here */
943
944 foreach(k, subquery_tlist)
945 {
946 TargetEntry *tle = (TargetEntry *) lfirst(k);
947 Var *outer_var;
948 Expr *tle_expr;
949 EquivalenceClass *outer_ec;
950 PathKey *outer_pk;
951 int score;
952
953 /* Is TLE actually available to the outer query? */
954 outer_var = find_var_for_subquery_tle(rel, tle);
955 if (!outer_var)
956 continue;
957
958 /*
959 * The targetlist entry is considered to match if it
960 * matches after sort-key canonicalization. That is
961 * needed since the sub_expr has been through the same
962 * process.
963 */
964 tle_expr = canonicalize_ec_expression(tle->expr,
965 sub_expr_type,
966 sub_expr_coll);
967 if (!equal(tle_expr, sub_expr))
968 continue;
969
970 /* See if we have a matching EC for the TLE */
971 outer_ec = get_eclass_for_sort_expr(root,
972 (Expr *) outer_var,
973 NULL,
974 sub_eclass->ec_opfamilies,
975 sub_expr_type,
976 sub_expr_coll,
977 0,
978 rel->relids,
979 false);
980
981 /*
982 * If we don't find a matching EC, this sub-pathkey isn't
983 * interesting to the outer query
984 */
985 if (!outer_ec)
986 continue;
987
988 outer_pk = make_canonical_pathkey(root,
989 outer_ec,
990 sub_pathkey->pk_opfamily,
991 sub_pathkey->pk_strategy,
992 sub_pathkey->pk_nulls_first);
993 /* score = # of equivalence peers */
994 score = list_length(outer_ec->ec_members) - 1;
995 /* +1 if it matches the proper query_pathkeys item */
996 if (retvallen < outer_query_keys &&
997 list_nth(root->query_pathkeys, retvallen) == outer_pk)
998 score++;
999 if (score > best_score)
1000 {
1001 best_pathkey = outer_pk;
1002 best_score = score;
1003 }
1004 }
1005 }
1006 }
1007
1008 /*
1009 * If we couldn't find a representation of this sub_pathkey, we're
1010 * done (we can't use the ones to its right, either).
1011 */
1012 if (!best_pathkey)
1013 break;
1014
1015 /*
1016 * Eliminate redundant ordering info; could happen if outer query
1017 * equivalences subquery keys...
1018 */
1019 if (!pathkey_is_redundant(best_pathkey, retval))
1020 {
1021 retval = lappend(retval, best_pathkey);
1022 retvallen++;
1023 }
1024 }
1025
1026 return retval;
1027 }
1028
1029 /*
1030 * find_var_for_subquery_tle
1031 *
1032 * If the given subquery tlist entry is due to be emitted by the subquery's
1033 * scan node, return a Var for it, else return NULL.
1034 *
1035 * We need this to ensure that we don't return pathkeys describing values
1036 * that are unavailable above the level of the subquery scan.
1037 */
1038 static Var *
find_var_for_subquery_tle(RelOptInfo * rel,TargetEntry * tle)1039 find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
1040 {
1041 ListCell *lc;
1042
1043 /* If the TLE is resjunk, it's certainly not visible to the outer query */
1044 if (tle->resjunk)
1045 return NULL;
1046
1047 /* Search the rel's targetlist to see what it will return */
1048 foreach(lc, rel->reltarget->exprs)
1049 {
1050 Var *var = (Var *) lfirst(lc);
1051
1052 /* Ignore placeholders */
1053 if (!IsA(var, Var))
1054 continue;
1055 Assert(var->varno == rel->relid);
1056
1057 /* If we find a Var referencing this TLE, we're good */
1058 if (var->varattno == tle->resno)
1059 return copyObject(var); /* Make a copy for safety */
1060 }
1061 return NULL;
1062 }
1063
1064 /*
1065 * build_join_pathkeys
1066 * Build the path keys for a join relation constructed by mergejoin or
1067 * nestloop join. This is normally the same as the outer path's keys.
1068 *
1069 * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
1070 * having the outer path's path keys, because null lefthand rows may be
1071 * inserted at random points. It must be treated as unsorted.
1072 *
1073 * We truncate away any pathkeys that are uninteresting for higher joins.
1074 *
1075 * 'joinrel' is the join relation that paths are being formed for
1076 * 'jointype' is the join type (inner, left, full, etc)
1077 * 'outer_pathkeys' is the list of the current outer path's path keys
1078 *
1079 * Returns the list of new path keys.
1080 */
1081 List *
build_join_pathkeys(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,List * outer_pathkeys)1082 build_join_pathkeys(PlannerInfo *root,
1083 RelOptInfo *joinrel,
1084 JoinType jointype,
1085 List *outer_pathkeys)
1086 {
1087 if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
1088 return NIL;
1089
1090 /*
1091 * This used to be quite a complex bit of code, but now that all pathkey
1092 * sublists start out life canonicalized, we don't have to do a darn thing
1093 * here!
1094 *
1095 * We do, however, need to truncate the pathkeys list, since it may
1096 * contain pathkeys that were useful for forming this joinrel but are
1097 * uninteresting to higher levels.
1098 */
1099 return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
1100 }
1101
1102 /****************************************************************************
1103 * PATHKEYS AND SORT CLAUSES
1104 ****************************************************************************/
1105
1106 /*
1107 * make_pathkeys_for_sortclauses
1108 * Generate a pathkeys list that represents the sort order specified
1109 * by a list of SortGroupClauses
1110 *
1111 * The resulting PathKeys are always in canonical form. (Actually, there
1112 * is no longer any code anywhere that creates non-canonical PathKeys.)
1113 *
1114 * We assume that root->nullable_baserels is the set of base relids that could
1115 * have gone to NULL below the SortGroupClause expressions. This is okay if
1116 * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
1117 * and if this function is only invoked after deconstruct_jointree. In the
1118 * future we might have to make callers pass in the appropriate
1119 * nullable-relids set, but for now it seems unnecessary.
1120 *
1121 * 'sortclauses' is a list of SortGroupClause nodes
1122 * 'tlist' is the targetlist to find the referenced tlist entries in
1123 */
1124 List *
make_pathkeys_for_sortclauses(PlannerInfo * root,List * sortclauses,List * tlist)1125 make_pathkeys_for_sortclauses(PlannerInfo *root,
1126 List *sortclauses,
1127 List *tlist)
1128 {
1129 List *pathkeys = NIL;
1130 ListCell *l;
1131
1132 foreach(l, sortclauses)
1133 {
1134 SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
1135 Expr *sortkey;
1136 PathKey *pathkey;
1137
1138 sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
1139 Assert(OidIsValid(sortcl->sortop));
1140 pathkey = make_pathkey_from_sortop(root,
1141 sortkey,
1142 root->nullable_baserels,
1143 sortcl->sortop,
1144 sortcl->nulls_first,
1145 sortcl->tleSortGroupRef,
1146 true);
1147
1148 /* Canonical form eliminates redundant ordering keys */
1149 if (!pathkey_is_redundant(pathkey, pathkeys))
1150 pathkeys = lappend(pathkeys, pathkey);
1151 }
1152 return pathkeys;
1153 }
1154
1155 /****************************************************************************
1156 * PATHKEYS AND MERGECLAUSES
1157 ****************************************************************************/
1158
1159 /*
1160 * initialize_mergeclause_eclasses
1161 * Set the EquivalenceClass links in a mergeclause restrictinfo.
1162 *
1163 * RestrictInfo contains fields in which we may cache pointers to
1164 * EquivalenceClasses for the left and right inputs of the mergeclause.
1165 * (If the mergeclause is a true equivalence clause these will be the
1166 * same EquivalenceClass, otherwise not.) If the mergeclause is either
1167 * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
1168 * then it's easy to set up the left_ec and right_ec members --- otherwise,
1169 * this function should be called to set them up. We will generate new
1170 * EquivalenceClauses if necessary to represent the mergeclause's left and
1171 * right sides.
1172 *
1173 * Note this is called before EC merging is complete, so the links won't
1174 * necessarily point to canonical ECs. Before they are actually used for
1175 * anything, update_mergeclause_eclasses must be called to ensure that
1176 * they've been updated to point to canonical ECs.
1177 */
1178 void
initialize_mergeclause_eclasses(PlannerInfo * root,RestrictInfo * restrictinfo)1179 initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
1180 {
1181 Expr *clause = restrictinfo->clause;
1182 Oid lefttype,
1183 righttype;
1184
1185 /* Should be a mergeclause ... */
1186 Assert(restrictinfo->mergeopfamilies != NIL);
1187 /* ... with links not yet set */
1188 Assert(restrictinfo->left_ec == NULL);
1189 Assert(restrictinfo->right_ec == NULL);
1190
1191 /* Need the declared input types of the operator */
1192 op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
1193
1194 /* Find or create a matching EquivalenceClass for each side */
1195 restrictinfo->left_ec =
1196 get_eclass_for_sort_expr(root,
1197 (Expr *) get_leftop(clause),
1198 restrictinfo->nullable_relids,
1199 restrictinfo->mergeopfamilies,
1200 lefttype,
1201 ((OpExpr *) clause)->inputcollid,
1202 0,
1203 NULL,
1204 true);
1205 restrictinfo->right_ec =
1206 get_eclass_for_sort_expr(root,
1207 (Expr *) get_rightop(clause),
1208 restrictinfo->nullable_relids,
1209 restrictinfo->mergeopfamilies,
1210 righttype,
1211 ((OpExpr *) clause)->inputcollid,
1212 0,
1213 NULL,
1214 true);
1215 }
1216
1217 /*
1218 * update_mergeclause_eclasses
1219 * Make the cached EquivalenceClass links valid in a mergeclause
1220 * restrictinfo.
1221 *
1222 * These pointers should have been set by process_equivalence or
1223 * initialize_mergeclause_eclasses, but they might have been set to
1224 * non-canonical ECs that got merged later. Chase up to the canonical
1225 * merged parent if so.
1226 */
1227 void
update_mergeclause_eclasses(PlannerInfo * root,RestrictInfo * restrictinfo)1228 update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
1229 {
1230 /* Should be a merge clause ... */
1231 Assert(restrictinfo->mergeopfamilies != NIL);
1232 /* ... with pointers already set */
1233 Assert(restrictinfo->left_ec != NULL);
1234 Assert(restrictinfo->right_ec != NULL);
1235
1236 /* Chase up to the top as needed */
1237 while (restrictinfo->left_ec->ec_merged)
1238 restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
1239 while (restrictinfo->right_ec->ec_merged)
1240 restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
1241 }
1242
1243 /*
1244 * find_mergeclauses_for_outer_pathkeys
1245 * This routine attempts to find a list of mergeclauses that can be
1246 * used with a specified ordering for the join's outer relation.
1247 * If successful, it returns a list of mergeclauses.
1248 *
1249 * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
1250 * 'restrictinfos' is a list of mergejoinable restriction clauses for the
1251 * join relation being formed, in no particular order.
1252 *
1253 * The restrictinfos must be marked (via outer_is_left) to show which side
1254 * of each clause is associated with the current outer path. (See
1255 * select_mergejoin_clauses())
1256 *
1257 * The result is NIL if no merge can be done, else a maximal list of
1258 * usable mergeclauses (represented as a list of their restrictinfo nodes).
1259 * The list is ordered to match the pathkeys, as required for execution.
1260 */
1261 List *
find_mergeclauses_for_outer_pathkeys(PlannerInfo * root,List * pathkeys,List * restrictinfos)1262 find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
1263 List *pathkeys,
1264 List *restrictinfos)
1265 {
1266 List *mergeclauses = NIL;
1267 ListCell *i;
1268
1269 /* make sure we have eclasses cached in the clauses */
1270 foreach(i, restrictinfos)
1271 {
1272 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1273
1274 update_mergeclause_eclasses(root, rinfo);
1275 }
1276
1277 foreach(i, pathkeys)
1278 {
1279 PathKey *pathkey = (PathKey *) lfirst(i);
1280 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1281 List *matched_restrictinfos = NIL;
1282 ListCell *j;
1283
1284 /*----------
1285 * A mergejoin clause matches a pathkey if it has the same EC.
1286 * If there are multiple matching clauses, take them all. In plain
1287 * inner-join scenarios we expect only one match, because
1288 * equivalence-class processing will have removed any redundant
1289 * mergeclauses. However, in outer-join scenarios there might be
1290 * multiple matches. An example is
1291 *
1292 * select * from a full join b
1293 * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1294 *
1295 * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1296 * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1297 * we *must* do so or we will be unable to form a valid plan.
1298 *
1299 * We expect that the given pathkeys list is canonical, which means
1300 * no two members have the same EC, so it's not possible for this
1301 * code to enter the same mergeclause into the result list twice.
1302 *
1303 * It's possible that multiple matching clauses might have different
1304 * ECs on the other side, in which case the order we put them into our
1305 * result makes a difference in the pathkeys required for the inner
1306 * input rel. However this routine hasn't got any info about which
1307 * order would be best, so we don't worry about that.
1308 *
1309 * It's also possible that the selected mergejoin clauses produce
1310 * a noncanonical ordering of pathkeys for the inner side, ie, we
1311 * might select clauses that reference b.v1, b.v2, b.v1 in that
1312 * order. This is not harmful in itself, though it suggests that
1313 * the clauses are partially redundant. Since the alternative is
1314 * to omit mergejoin clauses and thereby possibly fail to generate a
1315 * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1316 * has to delete duplicates when it constructs the inner pathkeys
1317 * list, and we also have to deal with such cases specially in
1318 * create_mergejoin_plan().
1319 *----------
1320 */
1321 foreach(j, restrictinfos)
1322 {
1323 RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1324 EquivalenceClass *clause_ec;
1325
1326 clause_ec = rinfo->outer_is_left ?
1327 rinfo->left_ec : rinfo->right_ec;
1328 if (clause_ec == pathkey_ec)
1329 matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1330 }
1331
1332 /*
1333 * If we didn't find a mergeclause, we're done --- any additional
1334 * sort-key positions in the pathkeys are useless. (But we can still
1335 * mergejoin if we found at least one mergeclause.)
1336 */
1337 if (matched_restrictinfos == NIL)
1338 break;
1339
1340 /*
1341 * If we did find usable mergeclause(s) for this sort-key position,
1342 * add them to result list.
1343 */
1344 mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1345 }
1346
1347 return mergeclauses;
1348 }
1349
1350 /*
1351 * select_outer_pathkeys_for_merge
1352 * Builds a pathkey list representing a possible sort ordering
1353 * that can be used with the given mergeclauses.
1354 *
1355 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1356 * that will be used in a merge join.
1357 * 'joinrel' is the join relation we are trying to construct.
1358 *
1359 * The restrictinfos must be marked (via outer_is_left) to show which side
1360 * of each clause is associated with the current outer path. (See
1361 * select_mergejoin_clauses())
1362 *
1363 * Returns a pathkeys list that can be applied to the outer relation.
1364 *
1365 * Since we assume here that a sort is required, there is no particular use
1366 * in matching any available ordering of the outerrel. (joinpath.c has an
1367 * entirely separate code path for considering sort-free mergejoins.) Rather,
1368 * it's interesting to try to match the requested query_pathkeys so that a
1369 * second output sort may be avoided; and failing that, we try to list "more
1370 * popular" keys (those with the most unmatched EquivalenceClass peers)
1371 * earlier, in hopes of making the resulting ordering useful for as many
1372 * higher-level mergejoins as possible.
1373 */
1374 List *
select_outer_pathkeys_for_merge(PlannerInfo * root,List * mergeclauses,RelOptInfo * joinrel)1375 select_outer_pathkeys_for_merge(PlannerInfo *root,
1376 List *mergeclauses,
1377 RelOptInfo *joinrel)
1378 {
1379 List *pathkeys = NIL;
1380 int nClauses = list_length(mergeclauses);
1381 EquivalenceClass **ecs;
1382 int *scores;
1383 int necs;
1384 ListCell *lc;
1385 int j;
1386
1387 /* Might have no mergeclauses */
1388 if (nClauses == 0)
1389 return NIL;
1390
1391 /*
1392 * Make arrays of the ECs used by the mergeclauses (dropping any
1393 * duplicates) and their "popularity" scores.
1394 */
1395 ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1396 scores = (int *) palloc(nClauses * sizeof(int));
1397 necs = 0;
1398
1399 foreach(lc, mergeclauses)
1400 {
1401 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1402 EquivalenceClass *oeclass;
1403 int score;
1404 ListCell *lc2;
1405
1406 /* get the outer eclass */
1407 update_mergeclause_eclasses(root, rinfo);
1408
1409 if (rinfo->outer_is_left)
1410 oeclass = rinfo->left_ec;
1411 else
1412 oeclass = rinfo->right_ec;
1413
1414 /* reject duplicates */
1415 for (j = 0; j < necs; j++)
1416 {
1417 if (ecs[j] == oeclass)
1418 break;
1419 }
1420 if (j < necs)
1421 continue;
1422
1423 /* compute score */
1424 score = 0;
1425 foreach(lc2, oeclass->ec_members)
1426 {
1427 EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1428
1429 /* Potential future join partner? */
1430 if (!em->em_is_const && !em->em_is_child &&
1431 !bms_overlap(em->em_relids, joinrel->relids))
1432 score++;
1433 }
1434
1435 ecs[necs] = oeclass;
1436 scores[necs] = score;
1437 necs++;
1438 }
1439
1440 /*
1441 * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1442 * can generate a sort order that's also useful for final output. There is
1443 * no percentage in a partial match, though, so we have to have 'em all.
1444 */
1445 if (root->query_pathkeys)
1446 {
1447 foreach(lc, root->query_pathkeys)
1448 {
1449 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1450 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1451
1452 for (j = 0; j < necs; j++)
1453 {
1454 if (ecs[j] == query_ec)
1455 break; /* found match */
1456 }
1457 if (j >= necs)
1458 break; /* didn't find match */
1459 }
1460 /* if we got to the end of the list, we have them all */
1461 if (lc == NULL)
1462 {
1463 /* copy query_pathkeys as starting point for our output */
1464 pathkeys = list_copy(root->query_pathkeys);
1465 /* mark their ECs as already-emitted */
1466 foreach(lc, root->query_pathkeys)
1467 {
1468 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1469 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1470
1471 for (j = 0; j < necs; j++)
1472 {
1473 if (ecs[j] == query_ec)
1474 {
1475 scores[j] = -1;
1476 break;
1477 }
1478 }
1479 }
1480 }
1481 }
1482
1483 /*
1484 * Add remaining ECs to the list in popularity order, using a default sort
1485 * ordering. (We could use qsort() here, but the list length is usually
1486 * so small it's not worth it.)
1487 */
1488 for (;;)
1489 {
1490 int best_j;
1491 int best_score;
1492 EquivalenceClass *ec;
1493 PathKey *pathkey;
1494
1495 best_j = 0;
1496 best_score = scores[0];
1497 for (j = 1; j < necs; j++)
1498 {
1499 if (scores[j] > best_score)
1500 {
1501 best_j = j;
1502 best_score = scores[j];
1503 }
1504 }
1505 if (best_score < 0)
1506 break; /* all done */
1507 ec = ecs[best_j];
1508 scores[best_j] = -1;
1509 pathkey = make_canonical_pathkey(root,
1510 ec,
1511 linitial_oid(ec->ec_opfamilies),
1512 BTLessStrategyNumber,
1513 false);
1514 /* can't be redundant because no duplicate ECs */
1515 Assert(!pathkey_is_redundant(pathkey, pathkeys));
1516 pathkeys = lappend(pathkeys, pathkey);
1517 }
1518
1519 pfree(ecs);
1520 pfree(scores);
1521
1522 return pathkeys;
1523 }
1524
1525 /*
1526 * make_inner_pathkeys_for_merge
1527 * Builds a pathkey list representing the explicit sort order that
1528 * must be applied to an inner path to make it usable with the
1529 * given mergeclauses.
1530 *
1531 * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1532 * that will be used in a merge join, in order.
1533 * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1534 * side of the join.
1535 *
1536 * The restrictinfos must be marked (via outer_is_left) to show which side
1537 * of each clause is associated with the current outer path. (See
1538 * select_mergejoin_clauses())
1539 *
1540 * Returns a pathkeys list that can be applied to the inner relation.
1541 *
1542 * Note that it is not this routine's job to decide whether sorting is
1543 * actually needed for a particular input path. Assume a sort is necessary;
1544 * just make the keys, eh?
1545 */
1546 List *
make_inner_pathkeys_for_merge(PlannerInfo * root,List * mergeclauses,List * outer_pathkeys)1547 make_inner_pathkeys_for_merge(PlannerInfo *root,
1548 List *mergeclauses,
1549 List *outer_pathkeys)
1550 {
1551 List *pathkeys = NIL;
1552 EquivalenceClass *lastoeclass;
1553 PathKey *opathkey;
1554 ListCell *lc;
1555 ListCell *lop;
1556
1557 lastoeclass = NULL;
1558 opathkey = NULL;
1559 lop = list_head(outer_pathkeys);
1560
1561 foreach(lc, mergeclauses)
1562 {
1563 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1564 EquivalenceClass *oeclass;
1565 EquivalenceClass *ieclass;
1566 PathKey *pathkey;
1567
1568 update_mergeclause_eclasses(root, rinfo);
1569
1570 if (rinfo->outer_is_left)
1571 {
1572 oeclass = rinfo->left_ec;
1573 ieclass = rinfo->right_ec;
1574 }
1575 else
1576 {
1577 oeclass = rinfo->right_ec;
1578 ieclass = rinfo->left_ec;
1579 }
1580
1581 /* outer eclass should match current or next pathkeys */
1582 /* we check this carefully for debugging reasons */
1583 if (oeclass != lastoeclass)
1584 {
1585 if (!lop)
1586 elog(ERROR, "too few pathkeys for mergeclauses");
1587 opathkey = (PathKey *) lfirst(lop);
1588 lop = lnext(outer_pathkeys, lop);
1589 lastoeclass = opathkey->pk_eclass;
1590 if (oeclass != lastoeclass)
1591 elog(ERROR, "outer pathkeys do not match mergeclause");
1592 }
1593
1594 /*
1595 * Often, we'll have same EC on both sides, in which case the outer
1596 * pathkey is also canonical for the inner side, and we can skip a
1597 * useless search.
1598 */
1599 if (ieclass == oeclass)
1600 pathkey = opathkey;
1601 else
1602 pathkey = make_canonical_pathkey(root,
1603 ieclass,
1604 opathkey->pk_opfamily,
1605 opathkey->pk_strategy,
1606 opathkey->pk_nulls_first);
1607
1608 /*
1609 * Don't generate redundant pathkeys (which can happen if multiple
1610 * mergeclauses refer to the same EC). Because we do this, the output
1611 * pathkey list isn't necessarily ordered like the mergeclauses, which
1612 * complicates life for create_mergejoin_plan(). But if we didn't,
1613 * we'd have a noncanonical sort key list, which would be bad; for one
1614 * reason, it certainly wouldn't match any available sort order for
1615 * the input relation.
1616 */
1617 if (!pathkey_is_redundant(pathkey, pathkeys))
1618 pathkeys = lappend(pathkeys, pathkey);
1619 }
1620
1621 return pathkeys;
1622 }
1623
1624 /*
1625 * trim_mergeclauses_for_inner_pathkeys
1626 * This routine trims a list of mergeclauses to include just those that
1627 * work with a specified ordering for the join's inner relation.
1628 *
1629 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1630 * join relation being formed, in an order known to work for the
1631 * currently-considered sort ordering of the join's outer rel.
1632 * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1633 * it should be equal to, or a truncation of, the result of
1634 * make_inner_pathkeys_for_merge for these mergeclauses.
1635 *
1636 * What we return will be a prefix of the given mergeclauses list.
1637 *
1638 * We need this logic because make_inner_pathkeys_for_merge's result isn't
1639 * necessarily in the same order as the mergeclauses. That means that if we
1640 * consider an inner-rel pathkey list that is a truncation of that result,
1641 * we might need to drop mergeclauses even though they match a surviving inner
1642 * pathkey. This happens when they are to the right of a mergeclause that
1643 * matches a removed inner pathkey.
1644 *
1645 * The mergeclauses must be marked (via outer_is_left) to show which side
1646 * of each clause is associated with the current outer path. (See
1647 * select_mergejoin_clauses())
1648 */
1649 List *
trim_mergeclauses_for_inner_pathkeys(PlannerInfo * root,List * mergeclauses,List * pathkeys)1650 trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root,
1651 List *mergeclauses,
1652 List *pathkeys)
1653 {
1654 List *new_mergeclauses = NIL;
1655 PathKey *pathkey;
1656 EquivalenceClass *pathkey_ec;
1657 bool matched_pathkey;
1658 ListCell *lip;
1659 ListCell *i;
1660
1661 /* No pathkeys => no mergeclauses (though we don't expect this case) */
1662 if (pathkeys == NIL)
1663 return NIL;
1664 /* Initialize to consider first pathkey */
1665 lip = list_head(pathkeys);
1666 pathkey = (PathKey *) lfirst(lip);
1667 pathkey_ec = pathkey->pk_eclass;
1668 lip = lnext(pathkeys, lip);
1669 matched_pathkey = false;
1670
1671 /* Scan mergeclauses to see how many we can use */
1672 foreach(i, mergeclauses)
1673 {
1674 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1675 EquivalenceClass *clause_ec;
1676
1677 /* Assume we needn't do update_mergeclause_eclasses again here */
1678
1679 /* Check clause's inner-rel EC against current pathkey */
1680 clause_ec = rinfo->outer_is_left ?
1681 rinfo->right_ec : rinfo->left_ec;
1682
1683 /* If we don't have a match, attempt to advance to next pathkey */
1684 if (clause_ec != pathkey_ec)
1685 {
1686 /* If we had no clauses matching this inner pathkey, must stop */
1687 if (!matched_pathkey)
1688 break;
1689
1690 /* Advance to next inner pathkey, if any */
1691 if (lip == NULL)
1692 break;
1693 pathkey = (PathKey *) lfirst(lip);
1694 pathkey_ec = pathkey->pk_eclass;
1695 lip = lnext(pathkeys, lip);
1696 matched_pathkey = false;
1697 }
1698
1699 /* If mergeclause matches current inner pathkey, we can use it */
1700 if (clause_ec == pathkey_ec)
1701 {
1702 new_mergeclauses = lappend(new_mergeclauses, rinfo);
1703 matched_pathkey = true;
1704 }
1705 else
1706 {
1707 /* Else, no hope of adding any more mergeclauses */
1708 break;
1709 }
1710 }
1711
1712 return new_mergeclauses;
1713 }
1714
1715
1716 /****************************************************************************
1717 * PATHKEY USEFULNESS CHECKS
1718 *
1719 * We only want to remember as many of the pathkeys of a path as have some
1720 * potential use, either for subsequent mergejoins or for meeting the query's
1721 * requested output ordering. This ensures that add_path() won't consider
1722 * a path to have a usefully different ordering unless it really is useful.
1723 * These routines check for usefulness of given pathkeys.
1724 ****************************************************************************/
1725
1726 /*
1727 * pathkeys_useful_for_merging
1728 * Count the number of pathkeys that may be useful for mergejoins
1729 * above the given relation.
1730 *
1731 * We consider a pathkey potentially useful if it corresponds to the merge
1732 * ordering of either side of any joinclause for the rel. This might be
1733 * overoptimistic, since joinclauses that require different other relations
1734 * might never be usable at the same time, but trying to be exact is likely
1735 * to be more trouble than it's worth.
1736 *
1737 * To avoid doubling the number of mergejoin paths considered, we would like
1738 * to consider only one of the two scan directions (ASC or DESC) as useful
1739 * for merging for any given target column. The choice is arbitrary unless
1740 * one of the directions happens to match an ORDER BY key, in which case
1741 * that direction should be preferred, in hopes of avoiding a final sort step.
1742 * right_merge_direction() implements this heuristic.
1743 */
1744 static int
pathkeys_useful_for_merging(PlannerInfo * root,RelOptInfo * rel,List * pathkeys)1745 pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1746 {
1747 int useful = 0;
1748 ListCell *i;
1749
1750 foreach(i, pathkeys)
1751 {
1752 PathKey *pathkey = (PathKey *) lfirst(i);
1753 bool matched = false;
1754 ListCell *j;
1755
1756 /* If "wrong" direction, not useful for merging */
1757 if (!right_merge_direction(root, pathkey))
1758 break;
1759
1760 /*
1761 * First look into the EquivalenceClass of the pathkey, to see if
1762 * there are any members not yet joined to the rel. If so, it's
1763 * surely possible to generate a mergejoin clause using them.
1764 */
1765 if (rel->has_eclass_joins &&
1766 eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
1767 matched = true;
1768 else
1769 {
1770 /*
1771 * Otherwise search the rel's joininfo list, which contains
1772 * non-EquivalenceClass-derivable join clauses that might
1773 * nonetheless be mergejoinable.
1774 */
1775 foreach(j, rel->joininfo)
1776 {
1777 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1778
1779 if (restrictinfo->mergeopfamilies == NIL)
1780 continue;
1781 update_mergeclause_eclasses(root, restrictinfo);
1782
1783 if (pathkey->pk_eclass == restrictinfo->left_ec ||
1784 pathkey->pk_eclass == restrictinfo->right_ec)
1785 {
1786 matched = true;
1787 break;
1788 }
1789 }
1790 }
1791
1792 /*
1793 * If we didn't find a mergeclause, we're done --- any additional
1794 * sort-key positions in the pathkeys are useless. (But we can still
1795 * mergejoin if we found at least one mergeclause.)
1796 */
1797 if (matched)
1798 useful++;
1799 else
1800 break;
1801 }
1802
1803 return useful;
1804 }
1805
1806 /*
1807 * right_merge_direction
1808 * Check whether the pathkey embodies the preferred sort direction
1809 * for merging its target column.
1810 */
1811 static bool
right_merge_direction(PlannerInfo * root,PathKey * pathkey)1812 right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1813 {
1814 ListCell *l;
1815
1816 foreach(l, root->query_pathkeys)
1817 {
1818 PathKey *query_pathkey = (PathKey *) lfirst(l);
1819
1820 if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1821 pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1822 {
1823 /*
1824 * Found a matching query sort column. Prefer this pathkey's
1825 * direction iff it matches. Note that we ignore pk_nulls_first,
1826 * which means that a sort might be needed anyway ... but we still
1827 * want to prefer only one of the two possible directions, and we
1828 * might as well use this one.
1829 */
1830 return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1831 }
1832 }
1833
1834 /* If no matching ORDER BY request, prefer the ASC direction */
1835 return (pathkey->pk_strategy == BTLessStrategyNumber);
1836 }
1837
1838 /*
1839 * pathkeys_useful_for_ordering
1840 * Count the number of pathkeys that are useful for meeting the
1841 * query's requested output ordering.
1842 *
1843 * Because we the have the possibility of incremental sort, a prefix list of
1844 * keys is potentially useful for improving the performance of the requested
1845 * ordering. Thus we return 0, if no valuable keys are found, or the number
1846 * of leading keys shared by the list and the requested ordering..
1847 */
1848 static int
pathkeys_useful_for_ordering(PlannerInfo * root,List * pathkeys)1849 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1850 {
1851 int n_common_pathkeys;
1852
1853 if (root->query_pathkeys == NIL)
1854 return 0; /* no special ordering requested */
1855
1856 if (pathkeys == NIL)
1857 return 0; /* unordered path */
1858
1859 (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys,
1860 &n_common_pathkeys);
1861
1862 return n_common_pathkeys;
1863 }
1864
1865 /*
1866 * truncate_useless_pathkeys
1867 * Shorten the given pathkey list to just the useful pathkeys.
1868 */
1869 List *
truncate_useless_pathkeys(PlannerInfo * root,RelOptInfo * rel,List * pathkeys)1870 truncate_useless_pathkeys(PlannerInfo *root,
1871 RelOptInfo *rel,
1872 List *pathkeys)
1873 {
1874 int nuseful;
1875 int nuseful2;
1876
1877 nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1878 nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1879 if (nuseful2 > nuseful)
1880 nuseful = nuseful2;
1881
1882 /*
1883 * Note: not safe to modify input list destructively, but we can avoid
1884 * copying the list if we're not actually going to change it
1885 */
1886 if (nuseful == 0)
1887 return NIL;
1888 else if (nuseful == list_length(pathkeys))
1889 return pathkeys;
1890 else
1891 return list_truncate(list_copy(pathkeys), nuseful);
1892 }
1893
1894 /*
1895 * has_useful_pathkeys
1896 * Detect whether the specified rel could have any pathkeys that are
1897 * useful according to truncate_useless_pathkeys().
1898 *
1899 * This is a cheap test that lets us skip building pathkeys at all in very
1900 * simple queries. It's OK to err in the direction of returning "true" when
1901 * there really aren't any usable pathkeys, but erring in the other direction
1902 * is bad --- so keep this in sync with the routines above!
1903 *
1904 * We could make the test more complex, for example checking to see if any of
1905 * the joinclauses are really mergejoinable, but that likely wouldn't win
1906 * often enough to repay the extra cycles. Queries with neither a join nor
1907 * a sort are reasonably common, though, so this much work seems worthwhile.
1908 */
1909 bool
has_useful_pathkeys(PlannerInfo * root,RelOptInfo * rel)1910 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1911 {
1912 if (rel->joininfo != NIL || rel->has_eclass_joins)
1913 return true; /* might be able to use pathkeys for merging */
1914 if (root->query_pathkeys != NIL)
1915 return true; /* might be able to use them for ordering */
1916 return false; /* definitely useless */
1917 }
1918