1 /*-------------------------------------------------------------------------
2 *
3 * pathkeys.c
4 * Utilities for matching and building path keys
5 *
6 * See src/backend/optimizer/README for a great deal of information about
7 * the nature and use of path keys.
8 *
9 *
10 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 * IDENTIFICATION
14 * src/backend/optimizer/path/pathkeys.c
15 *
16 *-------------------------------------------------------------------------
17 */
18 #include "postgres.h"
19
20 #include "access/stratnum.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/plannodes.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/tlist.h"
28 #include "utils/lsyscache.h"
29
30
31 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
32 static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
33 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
34
35
36 /****************************************************************************
37 * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
38 ****************************************************************************/
39
40 /*
41 * make_canonical_pathkey
42 * Given the parameters for a PathKey, find any pre-existing matching
43 * pathkey in the query's list of "canonical" pathkeys. Make a new
44 * entry if there's not one already.
45 *
46 * Note that this function must not be used until after we have completed
47 * merging EquivalenceClasses. (We don't try to enforce that here; instead,
48 * equivclass.c will complain if a merge occurs after root->canon_pathkeys
49 * has become nonempty.)
50 */
51 PathKey *
make_canonical_pathkey(PlannerInfo * root,EquivalenceClass * eclass,Oid opfamily,int strategy,bool nulls_first)52 make_canonical_pathkey(PlannerInfo *root,
53 EquivalenceClass *eclass, Oid opfamily,
54 int strategy, bool nulls_first)
55 {
56 PathKey *pk;
57 ListCell *lc;
58 MemoryContext oldcontext;
59
60 /* The passed eclass might be non-canonical, so chase up to the top */
61 while (eclass->ec_merged)
62 eclass = eclass->ec_merged;
63
64 foreach(lc, root->canon_pathkeys)
65 {
66 pk = (PathKey *) lfirst(lc);
67 if (eclass == pk->pk_eclass &&
68 opfamily == pk->pk_opfamily &&
69 strategy == pk->pk_strategy &&
70 nulls_first == pk->pk_nulls_first)
71 return pk;
72 }
73
74 /*
75 * Be sure canonical pathkeys are allocated in the main planning context.
76 * Not an issue in normal planning, but it is for GEQO.
77 */
78 oldcontext = MemoryContextSwitchTo(root->planner_cxt);
79
80 pk = makeNode(PathKey);
81 pk->pk_eclass = eclass;
82 pk->pk_opfamily = opfamily;
83 pk->pk_strategy = strategy;
84 pk->pk_nulls_first = nulls_first;
85
86 root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
87
88 MemoryContextSwitchTo(oldcontext);
89
90 return pk;
91 }
92
93 /*
94 * pathkey_is_redundant
95 * Is a pathkey redundant with one already in the given list?
96 *
97 * We detect two cases:
98 *
99 * 1. If the new pathkey's equivalence class contains a constant, and isn't
100 * below an outer join, then we can disregard it as a sort key. An example:
101 * SELECT ... WHERE x = 42 ORDER BY x, y;
102 * We may as well just sort by y. Note that because of opfamily matching,
103 * this is semantically correct: we know that the equality constraint is one
104 * that actually binds the variable to a single value in the terms of any
105 * ordering operator that might go with the eclass. This rule not only lets
106 * us simplify (or even skip) explicit sorts, but also allows matching index
107 * sort orders to a query when there are don't-care index columns.
108 *
109 * 2. If the new pathkey's equivalence class is the same as that of any
110 * existing member of the pathkey list, then it is redundant. Some examples:
111 * SELECT ... ORDER BY x, x;
112 * SELECT ... ORDER BY x, x DESC;
113 * SELECT ... WHERE x = y ORDER BY x, y;
114 * In all these cases the second sort key cannot distinguish values that are
115 * considered equal by the first, and so there's no point in using it.
116 * Note in particular that we need not compare opfamily (all the opfamilies
117 * of the EC have the same notion of equality) nor sort direction.
118 *
119 * Both the given pathkey and the list members must be canonical for this
120 * to work properly, but that's okay since we no longer ever construct any
121 * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
122 * canonical includes the additional requirement of no redundant entries,
123 * which is exactly what we are checking for here.)
124 *
125 * Because the equivclass.c machinery forms only one copy of any EC per query,
126 * pointer comparison is enough to decide whether canonical ECs are the same.
127 */
128 static bool
pathkey_is_redundant(PathKey * new_pathkey,List * pathkeys)129 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
130 {
131 EquivalenceClass *new_ec = new_pathkey->pk_eclass;
132 ListCell *lc;
133
134 /* Check for EC containing a constant --- unconditionally redundant */
135 if (EC_MUST_BE_REDUNDANT(new_ec))
136 return true;
137
138 /* If same EC already used in list, then redundant */
139 foreach(lc, pathkeys)
140 {
141 PathKey *old_pathkey = (PathKey *) lfirst(lc);
142
143 if (new_ec == old_pathkey->pk_eclass)
144 return true;
145 }
146
147 return false;
148 }
149
150 /*
151 * make_pathkey_from_sortinfo
152 * Given an expression and sort-order information, create a PathKey.
153 * The result is always a "canonical" PathKey, but it might be redundant.
154 *
155 * expr is the expression, and nullable_relids is the set of base relids
156 * that are potentially nullable below it.
157 *
158 * If the PathKey is being generated from a SortGroupClause, sortref should be
159 * the SortGroupClause's SortGroupRef; otherwise zero.
160 *
161 * If rel is not NULL, it identifies a specific relation we're considering
162 * a path for, and indicates that child EC members for that relation can be
163 * considered. Otherwise child members are ignored. (See the comments for
164 * get_eclass_for_sort_expr.)
165 *
166 * create_it is TRUE if we should create any missing EquivalenceClass
167 * needed to represent the sort key. If it's FALSE, we return NULL if the
168 * sort key isn't already present in any EquivalenceClass.
169 */
170 static PathKey *
make_pathkey_from_sortinfo(PlannerInfo * root,Expr * expr,Relids nullable_relids,Oid opfamily,Oid opcintype,Oid collation,bool reverse_sort,bool nulls_first,Index sortref,Relids rel,bool create_it)171 make_pathkey_from_sortinfo(PlannerInfo *root,
172 Expr *expr,
173 Relids nullable_relids,
174 Oid opfamily,
175 Oid opcintype,
176 Oid collation,
177 bool reverse_sort,
178 bool nulls_first,
179 Index sortref,
180 Relids rel,
181 bool create_it)
182 {
183 int16 strategy;
184 Oid equality_op;
185 List *opfamilies;
186 EquivalenceClass *eclass;
187
188 strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
189
190 /*
191 * EquivalenceClasses need to contain opfamily lists based on the family
192 * membership of mergejoinable equality operators, which could belong to
193 * more than one opfamily. So we have to look up the opfamily's equality
194 * operator and get its membership.
195 */
196 equality_op = get_opfamily_member(opfamily,
197 opcintype,
198 opcintype,
199 BTEqualStrategyNumber);
200 if (!OidIsValid(equality_op)) /* shouldn't happen */
201 elog(ERROR, "could not find equality operator for opfamily %u",
202 opfamily);
203 opfamilies = get_mergejoin_opfamilies(equality_op);
204 if (!opfamilies) /* certainly should find some */
205 elog(ERROR, "could not find opfamilies for equality operator %u",
206 equality_op);
207
208 /* Now find or (optionally) create a matching EquivalenceClass */
209 eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
210 opfamilies, opcintype, collation,
211 sortref, rel, create_it);
212
213 /* Fail if no EC and !create_it */
214 if (!eclass)
215 return NULL;
216
217 /* And finally we can find or create a PathKey node */
218 return make_canonical_pathkey(root, eclass, opfamily,
219 strategy, nulls_first);
220 }
221
222 /*
223 * make_pathkey_from_sortop
224 * Like make_pathkey_from_sortinfo, but work from a sort operator.
225 *
226 * This should eventually go away, but we need to restructure SortGroupClause
227 * first.
228 */
229 static PathKey *
make_pathkey_from_sortop(PlannerInfo * root,Expr * expr,Relids nullable_relids,Oid ordering_op,bool nulls_first,Index sortref,bool create_it)230 make_pathkey_from_sortop(PlannerInfo *root,
231 Expr *expr,
232 Relids nullable_relids,
233 Oid ordering_op,
234 bool nulls_first,
235 Index sortref,
236 bool create_it)
237 {
238 Oid opfamily,
239 opcintype,
240 collation;
241 int16 strategy;
242
243 /* Find the operator in pg_amop --- failure shouldn't happen */
244 if (!get_ordering_op_properties(ordering_op,
245 &opfamily, &opcintype, &strategy))
246 elog(ERROR, "operator %u is not a valid ordering operator",
247 ordering_op);
248
249 /* Because SortGroupClause doesn't carry collation, consult the expr */
250 collation = exprCollation((Node *) expr);
251
252 return make_pathkey_from_sortinfo(root,
253 expr,
254 nullable_relids,
255 opfamily,
256 opcintype,
257 collation,
258 (strategy == BTGreaterStrategyNumber),
259 nulls_first,
260 sortref,
261 NULL,
262 create_it);
263 }
264
265
266 /****************************************************************************
267 * PATHKEY COMPARISONS
268 ****************************************************************************/
269
270 /*
271 * compare_pathkeys
272 * Compare two pathkeys to see if they are equivalent, and if not whether
273 * one is "better" than the other.
274 *
275 * We assume the pathkeys are canonical, and so they can be checked for
276 * equality by simple pointer comparison.
277 */
278 PathKeysComparison
compare_pathkeys(List * keys1,List * keys2)279 compare_pathkeys(List *keys1, List *keys2)
280 {
281 ListCell *key1,
282 *key2;
283
284 /*
285 * Fall out quickly if we are passed two identical lists. This mostly
286 * catches the case where both are NIL, but that's common enough to
287 * warrant the test.
288 */
289 if (keys1 == keys2)
290 return PATHKEYS_EQUAL;
291
292 forboth(key1, keys1, key2, keys2)
293 {
294 PathKey *pathkey1 = (PathKey *) lfirst(key1);
295 PathKey *pathkey2 = (PathKey *) lfirst(key2);
296
297 if (pathkey1 != pathkey2)
298 return PATHKEYS_DIFFERENT; /* no need to keep looking */
299 }
300
301 /*
302 * If we reached the end of only one list, the other is longer and
303 * therefore not a subset.
304 */
305 if (key1 != NULL)
306 return PATHKEYS_BETTER1; /* key1 is longer */
307 if (key2 != NULL)
308 return PATHKEYS_BETTER2; /* key2 is longer */
309 return PATHKEYS_EQUAL;
310 }
311
312 /*
313 * pathkeys_contained_in
314 * Common special case of compare_pathkeys: we just want to know
315 * if keys2 are at least as well sorted as keys1.
316 */
317 bool
pathkeys_contained_in(List * keys1,List * keys2)318 pathkeys_contained_in(List *keys1, List *keys2)
319 {
320 switch (compare_pathkeys(keys1, keys2))
321 {
322 case PATHKEYS_EQUAL:
323 case PATHKEYS_BETTER2:
324 return true;
325 default:
326 break;
327 }
328 return false;
329 }
330
331 /*
332 * get_cheapest_path_for_pathkeys
333 * Find the cheapest path (according to the specified criterion) that
334 * satisfies the given pathkeys and parameterization.
335 * Return NULL if no such path.
336 *
337 * 'paths' is a list of possible paths that all generate the same relation
338 * 'pathkeys' represents a required ordering (in canonical form!)
339 * 'required_outer' denotes allowable outer relations for parameterized paths
340 * 'cost_criterion' is STARTUP_COST or TOTAL_COST
341 */
342 Path *
get_cheapest_path_for_pathkeys(List * paths,List * pathkeys,Relids required_outer,CostSelector cost_criterion)343 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
344 Relids required_outer,
345 CostSelector cost_criterion)
346 {
347 Path *matched_path = NULL;
348 ListCell *l;
349
350 foreach(l, paths)
351 {
352 Path *path = (Path *) lfirst(l);
353
354 /*
355 * Since cost comparison is a lot cheaper than pathkey comparison, do
356 * that first. (XXX is that still true?)
357 */
358 if (matched_path != NULL &&
359 compare_path_costs(matched_path, path, cost_criterion) <= 0)
360 continue;
361
362 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
363 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
364 matched_path = path;
365 }
366 return matched_path;
367 }
368
369 /*
370 * get_cheapest_fractional_path_for_pathkeys
371 * Find the cheapest path (for retrieving a specified fraction of all
372 * the tuples) that satisfies the given pathkeys and parameterization.
373 * Return NULL if no such path.
374 *
375 * See compare_fractional_path_costs() for the interpretation of the fraction
376 * parameter.
377 *
378 * 'paths' is a list of possible paths that all generate the same relation
379 * 'pathkeys' represents a required ordering (in canonical form!)
380 * 'required_outer' denotes allowable outer relations for parameterized paths
381 * 'fraction' is the fraction of the total tuples expected to be retrieved
382 */
383 Path *
get_cheapest_fractional_path_for_pathkeys(List * paths,List * pathkeys,Relids required_outer,double fraction)384 get_cheapest_fractional_path_for_pathkeys(List *paths,
385 List *pathkeys,
386 Relids required_outer,
387 double fraction)
388 {
389 Path *matched_path = NULL;
390 ListCell *l;
391
392 foreach(l, paths)
393 {
394 Path *path = (Path *) lfirst(l);
395
396 /*
397 * Since cost comparison is a lot cheaper than pathkey comparison, do
398 * that first. (XXX is that still true?)
399 */
400 if (matched_path != NULL &&
401 compare_fractional_path_costs(matched_path, path, fraction) <= 0)
402 continue;
403
404 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
405 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
406 matched_path = path;
407 }
408 return matched_path;
409 }
410
411 /****************************************************************************
412 * NEW PATHKEY FORMATION
413 ****************************************************************************/
414
415 /*
416 * build_index_pathkeys
417 * Build a pathkeys list that describes the ordering induced by an index
418 * scan using the given index. (Note that an unordered index doesn't
419 * induce any ordering, so we return NIL.)
420 *
421 * If 'scandir' is BackwardScanDirection, build pathkeys representing a
422 * backwards scan of the index.
423 *
424 * The result is canonical, meaning that redundant pathkeys are removed;
425 * it may therefore have fewer entries than there are index columns.
426 *
427 * Another reason for stopping early is that we may be able to tell that
428 * an index column's sort order is uninteresting for this query. However,
429 * that test is just based on the existence of an EquivalenceClass and not
430 * on position in pathkey lists, so it's not complete. Caller should call
431 * truncate_useless_pathkeys() to possibly remove more pathkeys.
432 */
433 List *
build_index_pathkeys(PlannerInfo * root,IndexOptInfo * index,ScanDirection scandir)434 build_index_pathkeys(PlannerInfo *root,
435 IndexOptInfo *index,
436 ScanDirection scandir)
437 {
438 List *retval = NIL;
439 ListCell *lc;
440 int i;
441
442 if (index->sortopfamily == NULL)
443 return NIL; /* non-orderable index */
444
445 i = 0;
446 foreach(lc, index->indextlist)
447 {
448 TargetEntry *indextle = (TargetEntry *) lfirst(lc);
449 Expr *indexkey;
450 bool reverse_sort;
451 bool nulls_first;
452 PathKey *cpathkey;
453
454 /* We assume we don't need to make a copy of the tlist item */
455 indexkey = indextle->expr;
456
457 if (ScanDirectionIsBackward(scandir))
458 {
459 reverse_sort = !index->reverse_sort[i];
460 nulls_first = !index->nulls_first[i];
461 }
462 else
463 {
464 reverse_sort = index->reverse_sort[i];
465 nulls_first = index->nulls_first[i];
466 }
467
468 /*
469 * OK, try to make a canonical pathkey for this sort key. Note we're
470 * underneath any outer joins, so nullable_relids should be NULL.
471 */
472 cpathkey = make_pathkey_from_sortinfo(root,
473 indexkey,
474 NULL,
475 index->sortopfamily[i],
476 index->opcintype[i],
477 index->indexcollations[i],
478 reverse_sort,
479 nulls_first,
480 0,
481 index->rel->relids,
482 false);
483
484 /*
485 * If the sort key isn't already present in any EquivalenceClass, then
486 * it's not an interesting sort order for this query. So we can stop
487 * now --- lower-order sort keys aren't useful either.
488 */
489 if (!cpathkey)
490 break;
491
492 /* Add to list unless redundant */
493 if (!pathkey_is_redundant(cpathkey, retval))
494 retval = lappend(retval, cpathkey);
495
496 i++;
497 }
498
499 return retval;
500 }
501
502 /*
503 * build_expression_pathkey
504 * Build a pathkeys list that describes an ordering by a single expression
505 * using the given sort operator.
506 *
507 * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
508 * We induce the other arguments assuming default sort order for the operator.
509 *
510 * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
511 * is false and the expression isn't already in some EquivalenceClass.
512 */
513 List *
build_expression_pathkey(PlannerInfo * root,Expr * expr,Relids nullable_relids,Oid opno,Relids rel,bool create_it)514 build_expression_pathkey(PlannerInfo *root,
515 Expr *expr,
516 Relids nullable_relids,
517 Oid opno,
518 Relids rel,
519 bool create_it)
520 {
521 List *pathkeys;
522 Oid opfamily,
523 opcintype;
524 int16 strategy;
525 PathKey *cpathkey;
526
527 /* Find the operator in pg_amop --- failure shouldn't happen */
528 if (!get_ordering_op_properties(opno,
529 &opfamily, &opcintype, &strategy))
530 elog(ERROR, "operator %u is not a valid ordering operator",
531 opno);
532
533 cpathkey = make_pathkey_from_sortinfo(root,
534 expr,
535 nullable_relids,
536 opfamily,
537 opcintype,
538 exprCollation((Node *) expr),
539 (strategy == BTGreaterStrategyNumber),
540 (strategy == BTGreaterStrategyNumber),
541 0,
542 rel,
543 create_it);
544
545 if (cpathkey)
546 pathkeys = list_make1(cpathkey);
547 else
548 pathkeys = NIL;
549
550 return pathkeys;
551 }
552
553 /*
554 * convert_subquery_pathkeys
555 * Build a pathkeys list that describes the ordering of a subquery's
556 * result, in the terms of the outer query. This is essentially a
557 * task of conversion.
558 *
559 * 'rel': outer query's RelOptInfo for the subquery relation.
560 * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
561 * 'subquery_tlist': the subquery's output targetlist, in its terms.
562 *
563 * We intentionally don't do truncate_useless_pathkeys() here, because there
564 * are situations where seeing the raw ordering of the subquery is helpful.
565 * For example, if it returns ORDER BY x DESC, that may prompt us to
566 * construct a mergejoin using DESC order rather than ASC order; but the
567 * right_merge_direction heuristic would have us throw the knowledge away.
568 */
569 List *
convert_subquery_pathkeys(PlannerInfo * root,RelOptInfo * rel,List * subquery_pathkeys,List * subquery_tlist)570 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
571 List *subquery_pathkeys,
572 List *subquery_tlist)
573 {
574 List *retval = NIL;
575 int retvallen = 0;
576 int outer_query_keys = list_length(root->query_pathkeys);
577 ListCell *i;
578
579 foreach(i, subquery_pathkeys)
580 {
581 PathKey *sub_pathkey = (PathKey *) lfirst(i);
582 EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
583 PathKey *best_pathkey = NULL;
584
585 if (sub_eclass->ec_has_volatile)
586 {
587 /*
588 * If the sub_pathkey's EquivalenceClass is volatile, then it must
589 * have come from an ORDER BY clause, and we have to match it to
590 * that same targetlist entry.
591 */
592 TargetEntry *tle;
593 Var *outer_var;
594
595 if (sub_eclass->ec_sortref == 0) /* can't happen */
596 elog(ERROR, "volatile EquivalenceClass has no sortref");
597 tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
598 Assert(tle);
599 /* Is TLE actually available to the outer query? */
600 outer_var = find_var_for_subquery_tle(rel, tle);
601 if (outer_var)
602 {
603 /* We can represent this sub_pathkey */
604 EquivalenceMember *sub_member;
605 EquivalenceClass *outer_ec;
606
607 Assert(list_length(sub_eclass->ec_members) == 1);
608 sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
609
610 /*
611 * Note: it might look funny to be setting sortref = 0 for a
612 * reference to a volatile sub_eclass. However, the
613 * expression is *not* volatile in the outer query: it's just
614 * a Var referencing whatever the subquery emitted. (IOW, the
615 * outer query isn't going to re-execute the volatile
616 * expression itself.) So this is okay. Likewise, it's
617 * correct to pass nullable_relids = NULL, because we're
618 * underneath any outer joins appearing in the outer query.
619 */
620 outer_ec =
621 get_eclass_for_sort_expr(root,
622 (Expr *) outer_var,
623 NULL,
624 sub_eclass->ec_opfamilies,
625 sub_member->em_datatype,
626 sub_eclass->ec_collation,
627 0,
628 rel->relids,
629 false);
630
631 /*
632 * If we don't find a matching EC, sub-pathkey isn't
633 * interesting to the outer query
634 */
635 if (outer_ec)
636 best_pathkey =
637 make_canonical_pathkey(root,
638 outer_ec,
639 sub_pathkey->pk_opfamily,
640 sub_pathkey->pk_strategy,
641 sub_pathkey->pk_nulls_first);
642 }
643 }
644 else
645 {
646 /*
647 * Otherwise, the sub_pathkey's EquivalenceClass could contain
648 * multiple elements (representing knowledge that multiple items
649 * are effectively equal). Each element might match none, one, or
650 * more of the output columns that are visible to the outer query.
651 * This means we may have multiple possible representations of the
652 * sub_pathkey in the context of the outer query. Ideally we
653 * would generate them all and put them all into an EC of the
654 * outer query, thereby propagating equality knowledge up to the
655 * outer query. Right now we cannot do so, because the outer
656 * query's EquivalenceClasses are already frozen when this is
657 * called. Instead we prefer the one that has the highest "score"
658 * (number of EC peers, plus one if it matches the outer
659 * query_pathkeys). This is the most likely to be useful in the
660 * outer query.
661 */
662 int best_score = -1;
663 ListCell *j;
664
665 foreach(j, sub_eclass->ec_members)
666 {
667 EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
668 Expr *sub_expr = sub_member->em_expr;
669 Oid sub_expr_type = sub_member->em_datatype;
670 Oid sub_expr_coll = sub_eclass->ec_collation;
671 ListCell *k;
672
673 if (sub_member->em_is_child)
674 continue; /* ignore children here */
675
676 foreach(k, subquery_tlist)
677 {
678 TargetEntry *tle = (TargetEntry *) lfirst(k);
679 Var *outer_var;
680 Expr *tle_expr;
681 EquivalenceClass *outer_ec;
682 PathKey *outer_pk;
683 int score;
684
685 /* Is TLE actually available to the outer query? */
686 outer_var = find_var_for_subquery_tle(rel, tle);
687 if (!outer_var)
688 continue;
689
690 /*
691 * The targetlist entry is considered to match if it
692 * matches after sort-key canonicalization. That is
693 * needed since the sub_expr has been through the same
694 * process.
695 */
696 tle_expr = canonicalize_ec_expression(tle->expr,
697 sub_expr_type,
698 sub_expr_coll);
699 if (!equal(tle_expr, sub_expr))
700 continue;
701
702 /* See if we have a matching EC for the TLE */
703 outer_ec = get_eclass_for_sort_expr(root,
704 (Expr *) outer_var,
705 NULL,
706 sub_eclass->ec_opfamilies,
707 sub_expr_type,
708 sub_expr_coll,
709 0,
710 rel->relids,
711 false);
712
713 /*
714 * If we don't find a matching EC, this sub-pathkey isn't
715 * interesting to the outer query
716 */
717 if (!outer_ec)
718 continue;
719
720 outer_pk = make_canonical_pathkey(root,
721 outer_ec,
722 sub_pathkey->pk_opfamily,
723 sub_pathkey->pk_strategy,
724 sub_pathkey->pk_nulls_first);
725 /* score = # of equivalence peers */
726 score = list_length(outer_ec->ec_members) - 1;
727 /* +1 if it matches the proper query_pathkeys item */
728 if (retvallen < outer_query_keys &&
729 list_nth(root->query_pathkeys, retvallen) == outer_pk)
730 score++;
731 if (score > best_score)
732 {
733 best_pathkey = outer_pk;
734 best_score = score;
735 }
736 }
737 }
738 }
739
740 /*
741 * If we couldn't find a representation of this sub_pathkey, we're
742 * done (we can't use the ones to its right, either).
743 */
744 if (!best_pathkey)
745 break;
746
747 /*
748 * Eliminate redundant ordering info; could happen if outer query
749 * equivalences subquery keys...
750 */
751 if (!pathkey_is_redundant(best_pathkey, retval))
752 {
753 retval = lappend(retval, best_pathkey);
754 retvallen++;
755 }
756 }
757
758 return retval;
759 }
760
761 /*
762 * find_var_for_subquery_tle
763 *
764 * If the given subquery tlist entry is due to be emitted by the subquery's
765 * scan node, return a Var for it, else return NULL.
766 *
767 * We need this to ensure that we don't return pathkeys describing values
768 * that are unavailable above the level of the subquery scan.
769 */
770 static Var *
find_var_for_subquery_tle(RelOptInfo * rel,TargetEntry * tle)771 find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
772 {
773 ListCell *lc;
774
775 /* If the TLE is resjunk, it's certainly not visible to the outer query */
776 if (tle->resjunk)
777 return NULL;
778
779 /* Search the rel's targetlist to see what it will return */
780 foreach(lc, rel->reltarget->exprs)
781 {
782 Var *var = (Var *) lfirst(lc);
783
784 /* Ignore placeholders */
785 if (!IsA(var, Var))
786 continue;
787 Assert(var->varno == rel->relid);
788
789 /* If we find a Var referencing this TLE, we're good */
790 if (var->varattno == tle->resno)
791 return copyObject(var); /* Make a copy for safety */
792 }
793 return NULL;
794 }
795
796 /*
797 * build_join_pathkeys
798 * Build the path keys for a join relation constructed by mergejoin or
799 * nestloop join. This is normally the same as the outer path's keys.
800 *
801 * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
802 * having the outer path's path keys, because null lefthand rows may be
803 * inserted at random points. It must be treated as unsorted.
804 *
805 * We truncate away any pathkeys that are uninteresting for higher joins.
806 *
807 * 'joinrel' is the join relation that paths are being formed for
808 * 'jointype' is the join type (inner, left, full, etc)
809 * 'outer_pathkeys' is the list of the current outer path's path keys
810 *
811 * Returns the list of new path keys.
812 */
813 List *
build_join_pathkeys(PlannerInfo * root,RelOptInfo * joinrel,JoinType jointype,List * outer_pathkeys)814 build_join_pathkeys(PlannerInfo *root,
815 RelOptInfo *joinrel,
816 JoinType jointype,
817 List *outer_pathkeys)
818 {
819 if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
820 return NIL;
821
822 /*
823 * This used to be quite a complex bit of code, but now that all pathkey
824 * sublists start out life canonicalized, we don't have to do a darn thing
825 * here!
826 *
827 * We do, however, need to truncate the pathkeys list, since it may
828 * contain pathkeys that were useful for forming this joinrel but are
829 * uninteresting to higher levels.
830 */
831 return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
832 }
833
834 /****************************************************************************
835 * PATHKEYS AND SORT CLAUSES
836 ****************************************************************************/
837
838 /*
839 * make_pathkeys_for_sortclauses
840 * Generate a pathkeys list that represents the sort order specified
841 * by a list of SortGroupClauses
842 *
843 * The resulting PathKeys are always in canonical form. (Actually, there
844 * is no longer any code anywhere that creates non-canonical PathKeys.)
845 *
846 * We assume that root->nullable_baserels is the set of base relids that could
847 * have gone to NULL below the SortGroupClause expressions. This is okay if
848 * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
849 * and if this function is only invoked after deconstruct_jointree. In the
850 * future we might have to make callers pass in the appropriate
851 * nullable-relids set, but for now it seems unnecessary.
852 *
853 * 'sortclauses' is a list of SortGroupClause nodes
854 * 'tlist' is the targetlist to find the referenced tlist entries in
855 */
856 List *
make_pathkeys_for_sortclauses(PlannerInfo * root,List * sortclauses,List * tlist)857 make_pathkeys_for_sortclauses(PlannerInfo *root,
858 List *sortclauses,
859 List *tlist)
860 {
861 List *pathkeys = NIL;
862 ListCell *l;
863
864 foreach(l, sortclauses)
865 {
866 SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
867 Expr *sortkey;
868 PathKey *pathkey;
869
870 sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
871 Assert(OidIsValid(sortcl->sortop));
872 pathkey = make_pathkey_from_sortop(root,
873 sortkey,
874 root->nullable_baserels,
875 sortcl->sortop,
876 sortcl->nulls_first,
877 sortcl->tleSortGroupRef,
878 true);
879
880 /* Canonical form eliminates redundant ordering keys */
881 if (!pathkey_is_redundant(pathkey, pathkeys))
882 pathkeys = lappend(pathkeys, pathkey);
883 }
884 return pathkeys;
885 }
886
887 /****************************************************************************
888 * PATHKEYS AND MERGECLAUSES
889 ****************************************************************************/
890
891 /*
892 * initialize_mergeclause_eclasses
893 * Set the EquivalenceClass links in a mergeclause restrictinfo.
894 *
895 * RestrictInfo contains fields in which we may cache pointers to
896 * EquivalenceClasses for the left and right inputs of the mergeclause.
897 * (If the mergeclause is a true equivalence clause these will be the
898 * same EquivalenceClass, otherwise not.) If the mergeclause is either
899 * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
900 * then it's easy to set up the left_ec and right_ec members --- otherwise,
901 * this function should be called to set them up. We will generate new
902 * EquivalenceClauses if necessary to represent the mergeclause's left and
903 * right sides.
904 *
905 * Note this is called before EC merging is complete, so the links won't
906 * necessarily point to canonical ECs. Before they are actually used for
907 * anything, update_mergeclause_eclasses must be called to ensure that
908 * they've been updated to point to canonical ECs.
909 */
910 void
initialize_mergeclause_eclasses(PlannerInfo * root,RestrictInfo * restrictinfo)911 initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
912 {
913 Expr *clause = restrictinfo->clause;
914 Oid lefttype,
915 righttype;
916
917 /* Should be a mergeclause ... */
918 Assert(restrictinfo->mergeopfamilies != NIL);
919 /* ... with links not yet set */
920 Assert(restrictinfo->left_ec == NULL);
921 Assert(restrictinfo->right_ec == NULL);
922
923 /* Need the declared input types of the operator */
924 op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
925
926 /* Find or create a matching EquivalenceClass for each side */
927 restrictinfo->left_ec =
928 get_eclass_for_sort_expr(root,
929 (Expr *) get_leftop(clause),
930 restrictinfo->nullable_relids,
931 restrictinfo->mergeopfamilies,
932 lefttype,
933 ((OpExpr *) clause)->inputcollid,
934 0,
935 NULL,
936 true);
937 restrictinfo->right_ec =
938 get_eclass_for_sort_expr(root,
939 (Expr *) get_rightop(clause),
940 restrictinfo->nullable_relids,
941 restrictinfo->mergeopfamilies,
942 righttype,
943 ((OpExpr *) clause)->inputcollid,
944 0,
945 NULL,
946 true);
947 }
948
949 /*
950 * update_mergeclause_eclasses
951 * Make the cached EquivalenceClass links valid in a mergeclause
952 * restrictinfo.
953 *
954 * These pointers should have been set by process_equivalence or
955 * initialize_mergeclause_eclasses, but they might have been set to
956 * non-canonical ECs that got merged later. Chase up to the canonical
957 * merged parent if so.
958 */
959 void
update_mergeclause_eclasses(PlannerInfo * root,RestrictInfo * restrictinfo)960 update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
961 {
962 /* Should be a merge clause ... */
963 Assert(restrictinfo->mergeopfamilies != NIL);
964 /* ... with pointers already set */
965 Assert(restrictinfo->left_ec != NULL);
966 Assert(restrictinfo->right_ec != NULL);
967
968 /* Chase up to the top as needed */
969 while (restrictinfo->left_ec->ec_merged)
970 restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
971 while (restrictinfo->right_ec->ec_merged)
972 restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
973 }
974
975 /*
976 * find_mergeclauses_for_outer_pathkeys
977 * This routine attempts to find a list of mergeclauses that can be
978 * used with a specified ordering for the join's outer relation.
979 * If successful, it returns a list of mergeclauses.
980 *
981 * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
982 * 'restrictinfos' is a list of mergejoinable restriction clauses for the
983 * join relation being formed, in no particular order.
984 *
985 * The restrictinfos must be marked (via outer_is_left) to show which side
986 * of each clause is associated with the current outer path. (See
987 * select_mergejoin_clauses())
988 *
989 * The result is NIL if no merge can be done, else a maximal list of
990 * usable mergeclauses (represented as a list of their restrictinfo nodes).
991 * The list is ordered to match the pathkeys, as required for execution.
992 */
993 List *
find_mergeclauses_for_outer_pathkeys(PlannerInfo * root,List * pathkeys,List * restrictinfos)994 find_mergeclauses_for_outer_pathkeys(PlannerInfo *root,
995 List *pathkeys,
996 List *restrictinfos)
997 {
998 List *mergeclauses = NIL;
999 ListCell *i;
1000
1001 /* make sure we have eclasses cached in the clauses */
1002 foreach(i, restrictinfos)
1003 {
1004 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1005
1006 update_mergeclause_eclasses(root, rinfo);
1007 }
1008
1009 foreach(i, pathkeys)
1010 {
1011 PathKey *pathkey = (PathKey *) lfirst(i);
1012 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1013 List *matched_restrictinfos = NIL;
1014 ListCell *j;
1015
1016 /*----------
1017 * A mergejoin clause matches a pathkey if it has the same EC.
1018 * If there are multiple matching clauses, take them all. In plain
1019 * inner-join scenarios we expect only one match, because
1020 * equivalence-class processing will have removed any redundant
1021 * mergeclauses. However, in outer-join scenarios there might be
1022 * multiple matches. An example is
1023 *
1024 * select * from a full join b
1025 * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1026 *
1027 * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1028 * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1029 * we *must* do so or we will be unable to form a valid plan.
1030 *
1031 * We expect that the given pathkeys list is canonical, which means
1032 * no two members have the same EC, so it's not possible for this
1033 * code to enter the same mergeclause into the result list twice.
1034 *
1035 * It's possible that multiple matching clauses might have different
1036 * ECs on the other side, in which case the order we put them into our
1037 * result makes a difference in the pathkeys required for the inner
1038 * input rel. However this routine hasn't got any info about which
1039 * order would be best, so we don't worry about that.
1040 *
1041 * It's also possible that the selected mergejoin clauses produce
1042 * a noncanonical ordering of pathkeys for the inner side, ie, we
1043 * might select clauses that reference b.v1, b.v2, b.v1 in that
1044 * order. This is not harmful in itself, though it suggests that
1045 * the clauses are partially redundant. Since the alternative is
1046 * to omit mergejoin clauses and thereby possibly fail to generate a
1047 * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1048 * has to delete duplicates when it constructs the inner pathkeys
1049 * list, and we also have to deal with such cases specially in
1050 * create_mergejoin_plan().
1051 *----------
1052 */
1053 foreach(j, restrictinfos)
1054 {
1055 RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1056 EquivalenceClass *clause_ec;
1057
1058 clause_ec = rinfo->outer_is_left ?
1059 rinfo->left_ec : rinfo->right_ec;
1060 if (clause_ec == pathkey_ec)
1061 matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1062 }
1063
1064 /*
1065 * If we didn't find a mergeclause, we're done --- any additional
1066 * sort-key positions in the pathkeys are useless. (But we can still
1067 * mergejoin if we found at least one mergeclause.)
1068 */
1069 if (matched_restrictinfos == NIL)
1070 break;
1071
1072 /*
1073 * If we did find usable mergeclause(s) for this sort-key position,
1074 * add them to result list.
1075 */
1076 mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1077 }
1078
1079 return mergeclauses;
1080 }
1081
1082 /*
1083 * select_outer_pathkeys_for_merge
1084 * Builds a pathkey list representing a possible sort ordering
1085 * that can be used with the given mergeclauses.
1086 *
1087 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1088 * that will be used in a merge join.
1089 * 'joinrel' is the join relation we are trying to construct.
1090 *
1091 * The restrictinfos must be marked (via outer_is_left) to show which side
1092 * of each clause is associated with the current outer path. (See
1093 * select_mergejoin_clauses())
1094 *
1095 * Returns a pathkeys list that can be applied to the outer relation.
1096 *
1097 * Since we assume here that a sort is required, there is no particular use
1098 * in matching any available ordering of the outerrel. (joinpath.c has an
1099 * entirely separate code path for considering sort-free mergejoins.) Rather,
1100 * it's interesting to try to match the requested query_pathkeys so that a
1101 * second output sort may be avoided; and failing that, we try to list "more
1102 * popular" keys (those with the most unmatched EquivalenceClass peers)
1103 * earlier, in hopes of making the resulting ordering useful for as many
1104 * higher-level mergejoins as possible.
1105 */
1106 List *
select_outer_pathkeys_for_merge(PlannerInfo * root,List * mergeclauses,RelOptInfo * joinrel)1107 select_outer_pathkeys_for_merge(PlannerInfo *root,
1108 List *mergeclauses,
1109 RelOptInfo *joinrel)
1110 {
1111 List *pathkeys = NIL;
1112 int nClauses = list_length(mergeclauses);
1113 EquivalenceClass **ecs;
1114 int *scores;
1115 int necs;
1116 ListCell *lc;
1117 int j;
1118
1119 /* Might have no mergeclauses */
1120 if (nClauses == 0)
1121 return NIL;
1122
1123 /*
1124 * Make arrays of the ECs used by the mergeclauses (dropping any
1125 * duplicates) and their "popularity" scores.
1126 */
1127 ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1128 scores = (int *) palloc(nClauses * sizeof(int));
1129 necs = 0;
1130
1131 foreach(lc, mergeclauses)
1132 {
1133 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1134 EquivalenceClass *oeclass;
1135 int score;
1136 ListCell *lc2;
1137
1138 /* get the outer eclass */
1139 update_mergeclause_eclasses(root, rinfo);
1140
1141 if (rinfo->outer_is_left)
1142 oeclass = rinfo->left_ec;
1143 else
1144 oeclass = rinfo->right_ec;
1145
1146 /* reject duplicates */
1147 for (j = 0; j < necs; j++)
1148 {
1149 if (ecs[j] == oeclass)
1150 break;
1151 }
1152 if (j < necs)
1153 continue;
1154
1155 /* compute score */
1156 score = 0;
1157 foreach(lc2, oeclass->ec_members)
1158 {
1159 EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1160
1161 /* Potential future join partner? */
1162 if (!em->em_is_const && !em->em_is_child &&
1163 !bms_overlap(em->em_relids, joinrel->relids))
1164 score++;
1165 }
1166
1167 ecs[necs] = oeclass;
1168 scores[necs] = score;
1169 necs++;
1170 }
1171
1172 /*
1173 * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1174 * can generate a sort order that's also useful for final output. There is
1175 * no percentage in a partial match, though, so we have to have 'em all.
1176 */
1177 if (root->query_pathkeys)
1178 {
1179 foreach(lc, root->query_pathkeys)
1180 {
1181 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1182 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1183
1184 for (j = 0; j < necs; j++)
1185 {
1186 if (ecs[j] == query_ec)
1187 break; /* found match */
1188 }
1189 if (j >= necs)
1190 break; /* didn't find match */
1191 }
1192 /* if we got to the end of the list, we have them all */
1193 if (lc == NULL)
1194 {
1195 /* copy query_pathkeys as starting point for our output */
1196 pathkeys = list_copy(root->query_pathkeys);
1197 /* mark their ECs as already-emitted */
1198 foreach(lc, root->query_pathkeys)
1199 {
1200 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1201 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1202
1203 for (j = 0; j < necs; j++)
1204 {
1205 if (ecs[j] == query_ec)
1206 {
1207 scores[j] = -1;
1208 break;
1209 }
1210 }
1211 }
1212 }
1213 }
1214
1215 /*
1216 * Add remaining ECs to the list in popularity order, using a default sort
1217 * ordering. (We could use qsort() here, but the list length is usually
1218 * so small it's not worth it.)
1219 */
1220 for (;;)
1221 {
1222 int best_j;
1223 int best_score;
1224 EquivalenceClass *ec;
1225 PathKey *pathkey;
1226
1227 best_j = 0;
1228 best_score = scores[0];
1229 for (j = 1; j < necs; j++)
1230 {
1231 if (scores[j] > best_score)
1232 {
1233 best_j = j;
1234 best_score = scores[j];
1235 }
1236 }
1237 if (best_score < 0)
1238 break; /* all done */
1239 ec = ecs[best_j];
1240 scores[best_j] = -1;
1241 pathkey = make_canonical_pathkey(root,
1242 ec,
1243 linitial_oid(ec->ec_opfamilies),
1244 BTLessStrategyNumber,
1245 false);
1246 /* can't be redundant because no duplicate ECs */
1247 Assert(!pathkey_is_redundant(pathkey, pathkeys));
1248 pathkeys = lappend(pathkeys, pathkey);
1249 }
1250
1251 pfree(ecs);
1252 pfree(scores);
1253
1254 return pathkeys;
1255 }
1256
1257 /*
1258 * make_inner_pathkeys_for_merge
1259 * Builds a pathkey list representing the explicit sort order that
1260 * must be applied to an inner path to make it usable with the
1261 * given mergeclauses.
1262 *
1263 * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1264 * that will be used in a merge join, in order.
1265 * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1266 * side of the join.
1267 *
1268 * The restrictinfos must be marked (via outer_is_left) to show which side
1269 * of each clause is associated with the current outer path. (See
1270 * select_mergejoin_clauses())
1271 *
1272 * Returns a pathkeys list that can be applied to the inner relation.
1273 *
1274 * Note that it is not this routine's job to decide whether sorting is
1275 * actually needed for a particular input path. Assume a sort is necessary;
1276 * just make the keys, eh?
1277 */
1278 List *
make_inner_pathkeys_for_merge(PlannerInfo * root,List * mergeclauses,List * outer_pathkeys)1279 make_inner_pathkeys_for_merge(PlannerInfo *root,
1280 List *mergeclauses,
1281 List *outer_pathkeys)
1282 {
1283 List *pathkeys = NIL;
1284 EquivalenceClass *lastoeclass;
1285 PathKey *opathkey;
1286 ListCell *lc;
1287 ListCell *lop;
1288
1289 lastoeclass = NULL;
1290 opathkey = NULL;
1291 lop = list_head(outer_pathkeys);
1292
1293 foreach(lc, mergeclauses)
1294 {
1295 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1296 EquivalenceClass *oeclass;
1297 EquivalenceClass *ieclass;
1298 PathKey *pathkey;
1299
1300 update_mergeclause_eclasses(root, rinfo);
1301
1302 if (rinfo->outer_is_left)
1303 {
1304 oeclass = rinfo->left_ec;
1305 ieclass = rinfo->right_ec;
1306 }
1307 else
1308 {
1309 oeclass = rinfo->right_ec;
1310 ieclass = rinfo->left_ec;
1311 }
1312
1313 /* outer eclass should match current or next pathkeys */
1314 /* we check this carefully for debugging reasons */
1315 if (oeclass != lastoeclass)
1316 {
1317 if (!lop)
1318 elog(ERROR, "too few pathkeys for mergeclauses");
1319 opathkey = (PathKey *) lfirst(lop);
1320 lop = lnext(lop);
1321 lastoeclass = opathkey->pk_eclass;
1322 if (oeclass != lastoeclass)
1323 elog(ERROR, "outer pathkeys do not match mergeclause");
1324 }
1325
1326 /*
1327 * Often, we'll have same EC on both sides, in which case the outer
1328 * pathkey is also canonical for the inner side, and we can skip a
1329 * useless search.
1330 */
1331 if (ieclass == oeclass)
1332 pathkey = opathkey;
1333 else
1334 pathkey = make_canonical_pathkey(root,
1335 ieclass,
1336 opathkey->pk_opfamily,
1337 opathkey->pk_strategy,
1338 opathkey->pk_nulls_first);
1339
1340 /*
1341 * Don't generate redundant pathkeys (which can happen if multiple
1342 * mergeclauses refer to the same EC). Because we do this, the output
1343 * pathkey list isn't necessarily ordered like the mergeclauses, which
1344 * complicates life for create_mergejoin_plan(). But if we didn't,
1345 * we'd have a noncanonical sort key list, which would be bad; for one
1346 * reason, it certainly wouldn't match any available sort order for
1347 * the input relation.
1348 */
1349 if (!pathkey_is_redundant(pathkey, pathkeys))
1350 pathkeys = lappend(pathkeys, pathkey);
1351 }
1352
1353 return pathkeys;
1354 }
1355
1356 /*
1357 * trim_mergeclauses_for_inner_pathkeys
1358 * This routine trims a list of mergeclauses to include just those that
1359 * work with a specified ordering for the join's inner relation.
1360 *
1361 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1362 * join relation being formed, in an order known to work for the
1363 * currently-considered sort ordering of the join's outer rel.
1364 * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1365 * it should be equal to, or a truncation of, the result of
1366 * make_inner_pathkeys_for_merge for these mergeclauses.
1367 *
1368 * What we return will be a prefix of the given mergeclauses list.
1369 *
1370 * We need this logic because make_inner_pathkeys_for_merge's result isn't
1371 * necessarily in the same order as the mergeclauses. That means that if we
1372 * consider an inner-rel pathkey list that is a truncation of that result,
1373 * we might need to drop mergeclauses even though they match a surviving inner
1374 * pathkey. This happens when they are to the right of a mergeclause that
1375 * matches a removed inner pathkey.
1376 *
1377 * The mergeclauses must be marked (via outer_is_left) to show which side
1378 * of each clause is associated with the current outer path. (See
1379 * select_mergejoin_clauses())
1380 */
1381 List *
trim_mergeclauses_for_inner_pathkeys(PlannerInfo * root,List * mergeclauses,List * pathkeys)1382 trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root,
1383 List *mergeclauses,
1384 List *pathkeys)
1385 {
1386 List *new_mergeclauses = NIL;
1387 PathKey *pathkey;
1388 EquivalenceClass *pathkey_ec;
1389 bool matched_pathkey;
1390 ListCell *lip;
1391 ListCell *i;
1392
1393 /* No pathkeys => no mergeclauses (though we don't expect this case) */
1394 if (pathkeys == NIL)
1395 return NIL;
1396 /* Initialize to consider first pathkey */
1397 lip = list_head(pathkeys);
1398 pathkey = (PathKey *) lfirst(lip);
1399 pathkey_ec = pathkey->pk_eclass;
1400 lip = lnext(lip);
1401 matched_pathkey = false;
1402
1403 /* Scan mergeclauses to see how many we can use */
1404 foreach(i, mergeclauses)
1405 {
1406 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1407 EquivalenceClass *clause_ec;
1408
1409 /* Assume we needn't do update_mergeclause_eclasses again here */
1410
1411 /* Check clause's inner-rel EC against current pathkey */
1412 clause_ec = rinfo->outer_is_left ?
1413 rinfo->right_ec : rinfo->left_ec;
1414
1415 /* If we don't have a match, attempt to advance to next pathkey */
1416 if (clause_ec != pathkey_ec)
1417 {
1418 /* If we had no clauses matching this inner pathkey, must stop */
1419 if (!matched_pathkey)
1420 break;
1421
1422 /* Advance to next inner pathkey, if any */
1423 if (lip == NULL)
1424 break;
1425 pathkey = (PathKey *) lfirst(lip);
1426 pathkey_ec = pathkey->pk_eclass;
1427 lip = lnext(lip);
1428 matched_pathkey = false;
1429 }
1430
1431 /* If mergeclause matches current inner pathkey, we can use it */
1432 if (clause_ec == pathkey_ec)
1433 {
1434 new_mergeclauses = lappend(new_mergeclauses, rinfo);
1435 matched_pathkey = true;
1436 }
1437 else
1438 {
1439 /* Else, no hope of adding any more mergeclauses */
1440 break;
1441 }
1442 }
1443
1444 return new_mergeclauses;
1445 }
1446
1447
1448 /****************************************************************************
1449 * PATHKEY USEFULNESS CHECKS
1450 *
1451 * We only want to remember as many of the pathkeys of a path as have some
1452 * potential use, either for subsequent mergejoins or for meeting the query's
1453 * requested output ordering. This ensures that add_path() won't consider
1454 * a path to have a usefully different ordering unless it really is useful.
1455 * These routines check for usefulness of given pathkeys.
1456 ****************************************************************************/
1457
1458 /*
1459 * pathkeys_useful_for_merging
1460 * Count the number of pathkeys that may be useful for mergejoins
1461 * above the given relation.
1462 *
1463 * We consider a pathkey potentially useful if it corresponds to the merge
1464 * ordering of either side of any joinclause for the rel. This might be
1465 * overoptimistic, since joinclauses that require different other relations
1466 * might never be usable at the same time, but trying to be exact is likely
1467 * to be more trouble than it's worth.
1468 *
1469 * To avoid doubling the number of mergejoin paths considered, we would like
1470 * to consider only one of the two scan directions (ASC or DESC) as useful
1471 * for merging for any given target column. The choice is arbitrary unless
1472 * one of the directions happens to match an ORDER BY key, in which case
1473 * that direction should be preferred, in hopes of avoiding a final sort step.
1474 * right_merge_direction() implements this heuristic.
1475 */
1476 static int
pathkeys_useful_for_merging(PlannerInfo * root,RelOptInfo * rel,List * pathkeys)1477 pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1478 {
1479 int useful = 0;
1480 ListCell *i;
1481
1482 foreach(i, pathkeys)
1483 {
1484 PathKey *pathkey = (PathKey *) lfirst(i);
1485 bool matched = false;
1486 ListCell *j;
1487
1488 /* If "wrong" direction, not useful for merging */
1489 if (!right_merge_direction(root, pathkey))
1490 break;
1491
1492 /*
1493 * First look into the EquivalenceClass of the pathkey, to see if
1494 * there are any members not yet joined to the rel. If so, it's
1495 * surely possible to generate a mergejoin clause using them.
1496 */
1497 if (rel->has_eclass_joins &&
1498 eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
1499 matched = true;
1500 else
1501 {
1502 /*
1503 * Otherwise search the rel's joininfo list, which contains
1504 * non-EquivalenceClass-derivable join clauses that might
1505 * nonetheless be mergejoinable.
1506 */
1507 foreach(j, rel->joininfo)
1508 {
1509 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1510
1511 if (restrictinfo->mergeopfamilies == NIL)
1512 continue;
1513 update_mergeclause_eclasses(root, restrictinfo);
1514
1515 if (pathkey->pk_eclass == restrictinfo->left_ec ||
1516 pathkey->pk_eclass == restrictinfo->right_ec)
1517 {
1518 matched = true;
1519 break;
1520 }
1521 }
1522 }
1523
1524 /*
1525 * If we didn't find a mergeclause, we're done --- any additional
1526 * sort-key positions in the pathkeys are useless. (But we can still
1527 * mergejoin if we found at least one mergeclause.)
1528 */
1529 if (matched)
1530 useful++;
1531 else
1532 break;
1533 }
1534
1535 return useful;
1536 }
1537
1538 /*
1539 * right_merge_direction
1540 * Check whether the pathkey embodies the preferred sort direction
1541 * for merging its target column.
1542 */
1543 static bool
right_merge_direction(PlannerInfo * root,PathKey * pathkey)1544 right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1545 {
1546 ListCell *l;
1547
1548 foreach(l, root->query_pathkeys)
1549 {
1550 PathKey *query_pathkey = (PathKey *) lfirst(l);
1551
1552 if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1553 pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1554 {
1555 /*
1556 * Found a matching query sort column. Prefer this pathkey's
1557 * direction iff it matches. Note that we ignore pk_nulls_first,
1558 * which means that a sort might be needed anyway ... but we still
1559 * want to prefer only one of the two possible directions, and we
1560 * might as well use this one.
1561 */
1562 return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1563 }
1564 }
1565
1566 /* If no matching ORDER BY request, prefer the ASC direction */
1567 return (pathkey->pk_strategy == BTLessStrategyNumber);
1568 }
1569
1570 /*
1571 * pathkeys_useful_for_ordering
1572 * Count the number of pathkeys that are useful for meeting the
1573 * query's requested output ordering.
1574 *
1575 * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1576 * no good to order by just the first key(s) of the requested ordering.
1577 * So the result is always either 0 or list_length(root->query_pathkeys).
1578 */
1579 static int
pathkeys_useful_for_ordering(PlannerInfo * root,List * pathkeys)1580 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1581 {
1582 if (root->query_pathkeys == NIL)
1583 return 0; /* no special ordering requested */
1584
1585 if (pathkeys == NIL)
1586 return 0; /* unordered path */
1587
1588 if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1589 {
1590 /* It's useful ... or at least the first N keys are */
1591 return list_length(root->query_pathkeys);
1592 }
1593
1594 return 0; /* path ordering not useful */
1595 }
1596
1597 /*
1598 * truncate_useless_pathkeys
1599 * Shorten the given pathkey list to just the useful pathkeys.
1600 */
1601 List *
truncate_useless_pathkeys(PlannerInfo * root,RelOptInfo * rel,List * pathkeys)1602 truncate_useless_pathkeys(PlannerInfo *root,
1603 RelOptInfo *rel,
1604 List *pathkeys)
1605 {
1606 int nuseful;
1607 int nuseful2;
1608
1609 nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1610 nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1611 if (nuseful2 > nuseful)
1612 nuseful = nuseful2;
1613
1614 /*
1615 * Note: not safe to modify input list destructively, but we can avoid
1616 * copying the list if we're not actually going to change it
1617 */
1618 if (nuseful == 0)
1619 return NIL;
1620 else if (nuseful == list_length(pathkeys))
1621 return pathkeys;
1622 else
1623 return list_truncate(list_copy(pathkeys), nuseful);
1624 }
1625
1626 /*
1627 * has_useful_pathkeys
1628 * Detect whether the specified rel could have any pathkeys that are
1629 * useful according to truncate_useless_pathkeys().
1630 *
1631 * This is a cheap test that lets us skip building pathkeys at all in very
1632 * simple queries. It's OK to err in the direction of returning "true" when
1633 * there really aren't any usable pathkeys, but erring in the other direction
1634 * is bad --- so keep this in sync with the routines above!
1635 *
1636 * We could make the test more complex, for example checking to see if any of
1637 * the joinclauses are really mergejoinable, but that likely wouldn't win
1638 * often enough to repay the extra cycles. Queries with neither a join nor
1639 * a sort are reasonably common, though, so this much work seems worthwhile.
1640 */
1641 bool
has_useful_pathkeys(PlannerInfo * root,RelOptInfo * rel)1642 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1643 {
1644 if (rel->joininfo != NIL || rel->has_eclass_joins)
1645 return true; /* might be able to use pathkeys for merging */
1646 if (root->query_pathkeys != NIL)
1647 return true; /* might be able to use them for ordering */
1648 return false; /* definitely useless */
1649 }
1650