1 /*-------------------------------------------------------------------------
2 *
3 * shard_pruning.c
4 * Shard pruning related code.
5 *
6 * The goal of shard pruning is to find a minimal (super)set of shards that
7 * need to be queried to find rows matching the expression in a query.
8 *
9 * In PruneShards we first make a compact representation of the given
10 * query logical tree. This tree represents boolean operators and its
11 * associated valid constraints (expression nodes) and whether boolean
12 * operator has associated unknown constraints. This allows essentially
13 * unknown constraints to be replaced by a simple placeholder flag.
14 *
15 * For example query: WHERE (hash_col IN (1,2)) AND (other_col=1 OR other_col=2)
16 * Gets transformed by steps:
17 * 1. AND(hash_col IN (1,2), OR(X, X))
18 * 2. AND(hash_col IN (1,2), OR(X))
19 * 3. AND(hash_col IN (1,2), X)
20 * Where X represents any set of unrecognized unprunable constraint(s).
21 *
22 * Above allows the following pruning machinery to understand that
23 * the target shard is determined solely by constraint: hash_col IN (1,2).
24 * Here it does not matter what X is as its ANDed by a valid constraint.
25 * Pruning machinery will fail, returning all shards, if it encounters
26 * eg. OR(hash_col=1, X) as this condition does not limit the target shards.
27 *
28 * PruneShards secondly computes a simplified disjunctive normal form (DNF)
29 * of the logical tree as a list of pruning instances. Each pruning instance
30 * contains all AND-ed constraints on the partition column. An OR expression
31 * will result in two or more new pruning instances being added for the
32 * subexpressions. The "parent" instance is marked isPartial and ignored
33 * during pruning.
34 *
35 * We use the distributive property for constraints of the form P AND (Q OR R)
36 * to rewrite it to (P AND Q) OR (P AND R) by copying constraints from parent
37 * to "child" pruning instances. However, we do not distribute nested
38 * expressions. While (P OR Q) AND (R OR S) is logically equivalent to (P AND
39 * R) OR (P AND S) OR (Q AND R) OR (Q AND S), in our implementation it becomes
40 * P OR Q OR R OR S. This is acceptable since this will always result in a
41 * superset of shards. If this proves to be a issue in practice, a more
42 * complete algorithm could be implemented.
43 *
44 * We then evaluate each non-partial pruning instance in the disjunction
45 * through the following, increasingly expensive, steps:
46 *
47 * 1) If there is a constant equality constraint on the partition column, and
48 * no overlapping shards exist, find the shard interval in which the
49 * constant falls
50 *
51 * 2) If there is a hash range constraint on the partition column, find the
52 * shard interval matching the range
53 *
54 * 3) If there are range constraints (e.g. (a > 0 AND a < 10)) on the
55 * partition column, find the shard intervals that overlap with the range
56 *
57 * 4) If there are overlapping shards, exhaustively search all shards that are
58 * not excluded by constraints
59 *
60 * Finally, the union of the shards found by each pruning instance is
61 * returned.
62 *
63 * Copyright (c) Citus Data, Inc.
64 *
65 *-------------------------------------------------------------------------
66 */
67 #include "postgres.h"
68
69 #include "distributed/pg_version_constants.h"
70
71 #include "fmgr.h"
72
73 #include "distributed/shard_pruning.h"
74
75 #include "access/nbtree.h"
76 #include "catalog/pg_am.h"
77 #include "catalog/pg_collation.h"
78 #include "catalog/pg_type.h"
79 #include "distributed/distributed_planner.h"
80 #include "distributed/listutils.h"
81 #include "distributed/log_utils.h"
82 #include "distributed/metadata_cache.h"
83 #include "distributed/multi_join_order.h"
84 #include "distributed/multi_physical_planner.h"
85 #include "distributed/pg_dist_partition.h"
86 #include "distributed/shardinterval_utils.h"
87 #include "distributed/version_compat.h"
88 #include "distributed/worker_protocol.h"
89 #include "nodes/nodeFuncs.h"
90 #include "nodes/makefuncs.h"
91 #include "optimizer/clauses.h"
92 #include "optimizer/planner.h"
93 #include "parser/parse_coerce.h"
94 #include "utils/arrayaccess.h"
95 #include "utils/catcache.h"
96 #include "utils/fmgrprotos.h"
97 #include "utils/lsyscache.h"
98 #include "utils/memutils.h"
99 #include "utils/ruleutils.h"
100
101
102 /*
103 * Tree node for compact representation of the given query logical tree.
104 * Represent a single boolean operator node and its associated
105 * valid constraints (expression nodes) and invalid constraint flag.
106 */
107 typedef struct PruningTreeNode
108 {
109 /* Indicates is this AND/OR boolean operator */
110 BoolExprType boolop;
111
112 /* Does this boolean operator have unknown/unprunable constraint(s) */
113 bool hasInvalidConstraints;
114
115 /* List of recognized valid prunable constraints of this boolean opearator */
116 List *validConstraints;
117
118 /* Child boolean producing operators. Parents are always different from their children */
119 List *childBooleanNodes;
120 } PruningTreeNode;
121
122 /*
123 * Context used for expression_tree_walker
124 */
125 typedef struct PruningTreeBuildContext
126 {
127 Var *partitionColumn;
128 PruningTreeNode *current;
129 } PruningTreeBuildContext;
130
131 /*
132 * A pruning instance is a set of ANDed constraints on a partition key.
133 */
134 typedef struct PruningInstance
135 {
136 /* Does this instance contain any prunable expressions? */
137 bool hasValidConstraint;
138
139 /*
140 * This constraint never evaluates to true, i.e. pruning does not have to
141 * be performed.
142 */
143 bool evaluatesToFalse;
144
145 /*
146 * Constraints on the partition column value. If multiple values are
147 * found the more restrictive one should be stored here. Even for
148 * a hash-partitioned table, actual column-values are stored here, *not*
149 * hashed values.
150 */
151 Const *lessConsts;
152 Const *lessEqualConsts;
153 Const *equalConsts;
154 Const *greaterEqualConsts;
155 Const *greaterConsts;
156
157 /*
158 * Constraint using a pre-hashed column value. The constant will store the
159 * hashed value, not the original value of the restriction.
160 */
161 Const *hashedEqualConsts;
162
163 /*
164 * Has this PruningInstance been added to
165 * ClauseWalkerContext->pruningInstances? This is not done immediately,
166 * but the first time a constraint (independent of us being able to handle
167 * that constraint) is found.
168 */
169 bool addedToPruningInstances;
170
171 /*
172 * When OR clauses are found, the non-ORed part (think of a < 3 AND (a > 5
173 * OR a > 7)) of the expression is stored in one PruningInstance which is
174 * then copied for the ORed expressions. The original is marked as
175 * isPartial, to avoid being used for pruning.
176 */
177 bool isPartial;
178 } PruningInstance;
179
180
181 /*
182 * Partial instances that need to be finished building. This is used to
183 * collect all ANDed restrictions, before looking into ORed expressions.
184 */
185 typedef struct PendingPruningInstance
186 {
187 PruningInstance *instance;
188 PruningTreeNode *continueAt;
189 } PendingPruningInstance;
190
191 typedef union \
192 { \
193 FunctionCallInfoBaseData fcinfo; \
194 /* ensure enough space for nargs args is available */ \
195 char fcinfo_data[SizeForFunctionCallInfo(2)]; \
196 } FunctionCall2InfoData;
197
198 /*
199 * We also ignore this warning in ./configure, but that's not always enough.
200 * The flags that are used during compilation by ./configure are determined by
201 * the compiler support it detects. This is usually GCC. This warning is only
202 * present in clang. So it would normally be fine to not use it with GCC. The
203 * problem is that clang is used to compile the JIT bitcode when postgres is
204 * compiled with -with-llvm. So in the end both clang and GCC are used to
205 * compile the project.
206 *
207 * So the flag is not provided on the command line, because ./configure notices
208 * that GCC doesn't support it. But this warning persists when compiling the
209 * bitcode. So that's why we ignore it here explicitly.
210 */
211 #ifdef __clang__
212 #pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
213 #endif /* __clang__ */
214
215 /*
216 * Data necessary to perform a single PruneShards().
217 */
218 typedef struct ClauseWalkerContext
219 {
220 Var *partitionColumn;
221 char partitionMethod;
222
223 /* ORed list of pruning targets */
224 List *pruningInstances;
225
226 /*
227 * Partially built PruningInstances, that need to be completed by doing a
228 * separate PrunableExpressionsWalker() pass.
229 */
230 List *pendingInstances;
231
232 /* PruningInstance currently being built, all eligible constraints are added here */
233 PruningInstance *currentPruningInstance;
234
235 /*
236 * Information about function calls we need to perform. Re-using the same
237 * FunctionCall2InfoData, instead of using FunctionCall2Coll, is often
238 * cheaper.
239 */
240 FunctionCall2InfoData compareValueFunctionCall;
241 FunctionCall2InfoData compareIntervalFunctionCall;
242 } ClauseWalkerContext;
243
244 static bool BuildPruningTree(Node *node, PruningTreeBuildContext *context);
245 static void SimplifyPruningTree(PruningTreeNode *node, PruningTreeNode *parent);
246 static void PrunableExpressions(PruningTreeNode *node, ClauseWalkerContext *context);
247 static void PrunableExpressionsWalker(PruningTreeNode *node,
248 ClauseWalkerContext *context);
249 static bool IsValidPartitionKeyRestriction(OpExpr *opClause);
250 static void AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context,
251 OpExpr *opClause, Var *varClause,
252 Const *constantClause);
253 static void AddSAOPartitionKeyRestrictionToInstance(ClauseWalkerContext *context,
254 ScalarArrayOpExpr *
255 arrayOperatorExpression);
256 static bool SAORestrictions(ScalarArrayOpExpr *arrayOperatorExpression,
257 Var *partitionColumn,
258 List **requestedRestrictions);
259 static void ErrorTypesDontMatch(Oid firstType, Oid firstCollId, Oid secondType,
260 Oid secondCollId);
261 static bool IsValidHashRestriction(OpExpr *opClause);
262 static void AddHashRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opClause,
263 Var *varClause, Const *constantClause);
264 static void AddNewConjuction(ClauseWalkerContext *context, PruningTreeNode *node);
265 static PruningInstance * CopyPartialPruningInstance(PruningInstance *sourceInstance);
266 static List * ShardArrayToList(ShardInterval **shardArray, int length);
267 static List * DeepCopyShardIntervalList(List *originalShardIntervalList);
268 static int PerformValueCompare(FunctionCallInfo compareFunctionCall, Datum a,
269 Datum b);
270 static int PerformCompare(FunctionCallInfo compareFunctionCall);
271
272 static List * PruneOne(CitusTableCacheEntry *cacheEntry, ClauseWalkerContext *context,
273 PruningInstance *prune);
274 static List * PruneWithBoundaries(CitusTableCacheEntry *cacheEntry,
275 ClauseWalkerContext *context,
276 PruningInstance *prune);
277 static List * ExhaustivePrune(CitusTableCacheEntry *cacheEntry,
278 ClauseWalkerContext *context,
279 PruningInstance *prune);
280 static bool ExhaustivePruneOne(ShardInterval *curInterval,
281 ClauseWalkerContext *context,
282 PruningInstance *prune);
283 static int UpperShardBoundary(Datum partitionColumnValue,
284 ShardInterval **shardIntervalCache,
285 int shardCount, FunctionCallInfo compareFunction,
286 bool includeMin);
287 static int LowerShardBoundary(Datum partitionColumnValue,
288 ShardInterval **shardIntervalCache,
289 int shardCount, FunctionCallInfo compareFunction,
290 bool includeMax);
291 static PruningTreeNode * CreatePruningNode(BoolExprType boolop);
292 static OpExpr * SAORestrictionArrayEqualityOp(ScalarArrayOpExpr *arrayOperatorExpression,
293 Var *partitionColumn);
294 static void DebugLogNode(char *fmt, Node *node, List *deparseCtx);
295 static void DebugLogPruningInstance(PruningInstance *pruning, List *deparseCtx);
296 static int ConstraintCount(PruningTreeNode *node);
297
298
299 /*
300 * PruneShards returns all shards from a distributed table that cannot be
301 * proven to be eliminated by whereClauseList.
302 *
303 * For non-distributed tables such as reference table, the function
304 * simply returns the single shard that the table has.
305 *
306 * When there is a single <partition column> = <constant> filter in the where
307 * clause list, the constant is written to the partitionValueConst pointer.
308 */
309 List *
PruneShards(Oid relationId,Index rangeTableId,List * whereClauseList,Const ** partitionValueConst)310 PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList,
311 Const **partitionValueConst)
312 {
313 CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
314 int shardCount = cacheEntry->shardIntervalArrayLength;
315 char partitionMethod = cacheEntry->partitionMethod;
316 ClauseWalkerContext context = { 0 };
317 ListCell *pruneCell;
318 List *prunedList = NIL;
319 bool foundRestriction = false;
320 bool foundPartitionColumnValue = false;
321 Const *singlePartitionValueConst = NULL;
322
323 /* there are no shards to return */
324 if (shardCount == 0)
325 {
326 return NIL;
327 }
328
329 /* always return empty result if WHERE clause is of the form: false (AND ..) */
330 if (ContainsFalseClause(whereClauseList))
331 {
332 return NIL;
333 }
334
335 /* short circuit for non-distributed tables such as reference table */
336 if (IsCitusTableTypeCacheEntry(cacheEntry, CITUS_TABLE_WITH_NO_DIST_KEY))
337 {
338 prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
339 cacheEntry->shardIntervalArrayLength);
340 return DeepCopyShardIntervalList(prunedList);
341 }
342
343
344 context.partitionMethod = partitionMethod;
345 context.partitionColumn = PartitionColumn(relationId, rangeTableId);
346 context.currentPruningInstance = palloc0(sizeof(PruningInstance));
347
348 if (cacheEntry->shardIntervalCompareFunction)
349 {
350 /* initiate function call info once (allows comparators to cache metadata) */
351 InitFunctionCallInfoData(*(FunctionCallInfo) &
352 context.compareIntervalFunctionCall,
353 cacheEntry->shardIntervalCompareFunction,
354 2, cacheEntry->partitionColumn->varcollid, NULL, NULL);
355 }
356 else
357 {
358 ereport(ERROR, (errmsg("shard pruning not possible without "
359 "a shard interval comparator")));
360 }
361
362 if (cacheEntry->shardColumnCompareFunction)
363 {
364 /* initiate function call info once (allows comparators to cache metadata) */
365 InitFunctionCallInfoData(*(FunctionCallInfo) &
366 context.compareValueFunctionCall,
367 cacheEntry->shardColumnCompareFunction,
368 2, cacheEntry->partitionColumn->varcollid, NULL, NULL);
369 }
370 else
371 {
372 ereport(ERROR, (errmsg("shard pruning not possible without "
373 "a partition column comparator")));
374 }
375
376 PruningTreeNode *tree = CreatePruningNode(AND_EXPR);
377
378 PruningTreeBuildContext treeBuildContext = { 0 };
379 treeBuildContext.current = tree;
380 treeBuildContext.partitionColumn = PartitionColumn(relationId, rangeTableId);
381
382 /* Build logical tree of prunable restrictions and invalid restrictions */
383 BuildPruningTree((Node *) whereClauseList, &treeBuildContext);
384
385 /* Simplify logic tree of prunable restrictions */
386 SimplifyPruningTree(tree, NULL);
387
388 /* Figure out what we can prune on */
389 PrunableExpressions(tree, &context);
390
391 List *debugLoggedPruningInstances = NIL;
392
393 /*
394 * Prune using each of the PrunableInstances we found, and OR results
395 * together.
396 */
397 foreach(pruneCell, context.pruningInstances)
398 {
399 PruningInstance *prune = (PruningInstance *) lfirst(pruneCell);
400
401 /*
402 * If this is a partial instance, a fully built one has also been
403 * added. Skip.
404 */
405 if (prune->isPartial)
406 {
407 continue;
408 }
409
410 /*
411 * If the current instance has no prunable expressions, we'll have to
412 * return all shards. No point in continuing pruning in that case.
413 */
414 if (!prune->hasValidConstraint)
415 {
416 foundRestriction = false;
417 break;
418 }
419
420 if (context.partitionMethod == DISTRIBUTE_BY_HASH)
421 {
422 if (!prune->evaluatesToFalse && !prune->equalConsts &&
423 !prune->hashedEqualConsts)
424 {
425 /* if hash-partitioned and no equals constraints, return all shards */
426 foundRestriction = false;
427 break;
428 }
429 else if (partitionValueConst != NULL && prune->equalConsts != NULL)
430 {
431 if (!foundPartitionColumnValue)
432 {
433 /* remember the partition column value */
434 singlePartitionValueConst = prune->equalConsts;
435 foundPartitionColumnValue = true;
436 }
437 else if (singlePartitionValueConst == NULL)
438 {
439 /* already found multiple partition column values */
440 }
441 else if (!equal(prune->equalConsts, singlePartitionValueConst))
442 {
443 /* found multiple partition column values */
444 singlePartitionValueConst = NULL;
445 }
446 }
447 }
448
449 List *pruneOneList = PruneOne(cacheEntry, &context, prune);
450
451 if (prunedList)
452 {
453 /*
454 * We can use list_union_ptr, which is a lot faster than doing
455 * comparing shards by value, because all the ShardIntervals are
456 * guaranteed to be from
457 * CitusTableCacheEntry->sortedShardIntervalArray (thus having the
458 * same pointer values).
459 */
460 prunedList = list_union_ptr(prunedList, pruneOneList);
461 }
462 else
463 {
464 prunedList = pruneOneList;
465 }
466 foundRestriction = true;
467
468 if (IsLoggableLevel(DEBUG3) && pruneOneList)
469 {
470 debugLoggedPruningInstances = lappend(debugLoggedPruningInstances, prune);
471 }
472 }
473
474 /* found no valid restriction, build list of all shards */
475 if (!foundRestriction)
476 {
477 prunedList = ShardArrayToList(cacheEntry->sortedShardIntervalArray,
478 cacheEntry->shardIntervalArrayLength);
479 }
480
481 if (IsLoggableLevel(DEBUG3))
482 {
483 char *relationName = get_rel_name(relationId);
484 if (foundRestriction && debugLoggedPruningInstances != NIL)
485 {
486 List *deparseCtx = deparse_context_for("unknown", relationId);
487 foreach(pruneCell, debugLoggedPruningInstances)
488 {
489 PruningInstance *prune = (PruningInstance *) lfirst(pruneCell);
490 DebugLogPruningInstance(prune, deparseCtx);
491 }
492 }
493 else
494 {
495 ereport(DEBUG3, (errmsg("no sharding pruning constraints on %s found",
496 relationName)));
497 }
498
499 ereport(DEBUG3, (errmsg("shard count after pruning for %s: %d", relationName,
500 list_length(prunedList))));
501 }
502
503 /* if requested, copy the partition value constant */
504 if (partitionValueConst != NULL)
505 {
506 if (singlePartitionValueConst != NULL)
507 {
508 *partitionValueConst = copyObject(singlePartitionValueConst);
509 }
510 else
511 {
512 *partitionValueConst = NULL;
513 }
514 }
515
516 /*
517 * Deep copy list, so it's independent of the CitusTableCacheEntry
518 * contents.
519 */
520 return DeepCopyShardIntervalList(prunedList);
521 }
522
523
524 /*
525 * IsValidConditionNode checks whether node is a valid constraint for pruning.
526 */
527 static bool
IsValidConditionNode(Node * node,Var * partitionColumn)528 IsValidConditionNode(Node *node, Var *partitionColumn)
529 {
530 if (IsA(node, OpExpr))
531 {
532 OpExpr *opClause = (OpExpr *) node;
533 Var *varClause = NULL;
534 if (VarConstOpExprClause(opClause, &varClause, NULL))
535 {
536 if (equal(varClause, partitionColumn))
537 {
538 return IsValidPartitionKeyRestriction(opClause);
539 }
540 else if (varClause->varattno == RESERVED_HASHED_COLUMN_ID)
541 {
542 return IsValidHashRestriction(opClause);
543 }
544 }
545
546 return false;
547 }
548 else if (IsA(node, ScalarArrayOpExpr))
549 {
550 ScalarArrayOpExpr *arrayOperatorExpression = (ScalarArrayOpExpr *) node;
551 return SAORestrictions(arrayOperatorExpression, partitionColumn, NULL);
552 }
553 else
554 {
555 return false;
556 }
557 }
558
559
560 /*
561 * BuildPruningTree builds a logical tree of constraints for pruning.
562 */
563 static bool
BuildPruningTree(Node * node,PruningTreeBuildContext * context)564 BuildPruningTree(Node *node, PruningTreeBuildContext *context)
565 {
566 if (node == NULL)
567 {
568 return false;
569 }
570
571 if (IsA(node, List))
572 {
573 return expression_tree_walker(node, BuildPruningTree, context);
574 }
575 else if (IsA(node, BoolExpr))
576 {
577 BoolExpr *boolExpr = (BoolExpr *) node;
578
579 if (boolExpr->boolop == NOT_EXPR)
580 {
581 /*
582 * With Var-Const conditions we should not encounter NOT_EXPR nodes.
583 * Postgres standard planner applies De Morgan's laws to remove them.
584 * We still encounter them with subqueries inside NOT, for example with:
585 * WHERE id NOT IN (SELECT id FROM something).
586 * We treat these as invalid constraints for pruning when we encounter them.
587 */
588 context->current->hasInvalidConstraints = true;
589
590 return false;
591 }
592 else if (context->current->boolop != boolExpr->boolop)
593 {
594 PruningTreeNode *child = CreatePruningNode(boolExpr->boolop);
595
596 context->current->childBooleanNodes = lappend(
597 context->current->childBooleanNodes, child);
598
599 PruningTreeBuildContext newContext = { 0 };
600 newContext.partitionColumn = context->partitionColumn;
601 newContext.current = child;
602
603 return expression_tree_walker((Node *) boolExpr->args,
604 BuildPruningTree, &newContext);
605 }
606 else
607 {
608 return expression_tree_walker(node, BuildPruningTree, context);
609 }
610 }
611 else if (IsValidConditionNode(node, context->partitionColumn))
612 {
613 context->current->validConstraints = lappend(context->current->validConstraints,
614 node);
615
616 return false;
617 }
618 else
619 {
620 context->current->hasInvalidConstraints = true;
621
622 return false;
623 }
624 }
625
626
627 /*
628 * SimplifyPruningTree reduces logical tree of valid and invalid constraints for pruning.
629 * The goal is to remove any node having just a single constraint associated with it.
630 * This constraint is assigned to the parent logical node.
631 *
632 * For example 'AND(hash_col = 1, OR(X))' gets simplified to 'AND(hash_col = 1, X)',
633 * where X is any unknown condition.
634 */
635 static void
SimplifyPruningTree(PruningTreeNode * node,PruningTreeNode * parent)636 SimplifyPruningTree(PruningTreeNode *node, PruningTreeNode *parent)
637 {
638 /* Copy list of children as its mutated inside the loop */
639 List *childBooleanNodes = list_copy(node->childBooleanNodes);
640
641 ListCell *cell;
642 foreach(cell, childBooleanNodes)
643 {
644 PruningTreeNode *child = (PruningTreeNode *) lfirst(cell);
645 SimplifyPruningTree(child, node);
646 }
647
648 if (!parent)
649 {
650 /* Root is always ANDed expressions */
651 Assert(node->boolop == AND_EXPR);
652 return;
653 }
654
655 /* Boolean operator with single (recognized/unknown) constraint gets simplified */
656 if (ConstraintCount(node) <= 1)
657 {
658 Assert(node->childBooleanNodes == NIL);
659 parent->validConstraints = list_concat(parent->validConstraints,
660 node->validConstraints);
661 parent->hasInvalidConstraints = parent->hasInvalidConstraints ||
662 node->hasInvalidConstraints;
663
664 /* Remove current node from parent. Its constraint was assigned to the parent above */
665 parent->childBooleanNodes = list_delete_ptr(parent->childBooleanNodes, node);
666 }
667 }
668
669
670 /*
671 * ContainsFalseClause returns whether the flattened where clause list
672 * contains false as a clause.
673 */
674 bool
ContainsFalseClause(List * whereClauseList)675 ContainsFalseClause(List *whereClauseList)
676 {
677 bool containsFalseClause = false;
678 ListCell *clauseCell = NULL;
679
680 foreach(clauseCell, whereClauseList)
681 {
682 Node *clause = (Node *) lfirst(clauseCell);
683
684 if (IsA(clause, Const))
685 {
686 Const *constant = (Const *) clause;
687 if (constant->consttype == BOOLOID && !DatumGetBool(constant->constvalue))
688 {
689 containsFalseClause = true;
690 break;
691 }
692 }
693 }
694
695 return containsFalseClause;
696 }
697
698
699 /*
700 * PrunableExpressions builds a list of all prunable expressions in node,
701 * storing them in context->pruningInstances.
702 */
703 static void
PrunableExpressions(PruningTreeNode * tree,ClauseWalkerContext * context)704 PrunableExpressions(PruningTreeNode *tree, ClauseWalkerContext *context)
705 {
706 /*
707 * Build initial list of prunable expressions. As long as only,
708 * implicitly or explicitly, ANDed expressions are found, this perform a
709 * depth-first search. When an ORed expression is found, the current
710 * PruningInstance is added to context->pruningInstances (once for each
711 * ORed expression), then the tree-traversal is continued without
712 * recursing. Once at the top-level again, we'll process all pending
713 * expressions - that allows us to find all ANDed expressions, before
714 * recursing into an ORed expression.
715 */
716 PrunableExpressionsWalker(tree, context);
717
718 /*
719 * Process all pending instances. While processing, new ones might be
720 * added to the list, so don't use foreach().
721 *
722 * Check the places in PruningInstanceWalker that push onto
723 * context->pendingInstances why construction of the PruningInstance might
724 * be pending.
725 *
726 * We copy the partial PruningInstance, and continue adding information by
727 * calling PrunableExpressionsWalker() on the copy, continuing at the
728 * node stored in PendingPruningInstance->continueAt.
729 */
730 while (context->pendingInstances != NIL)
731 {
732 PendingPruningInstance *instance =
733 (PendingPruningInstance *) linitial(context->pendingInstances);
734 PruningInstance *newPrune = CopyPartialPruningInstance(instance->instance);
735
736 context->pendingInstances = list_delete_first(context->pendingInstances);
737
738 context->currentPruningInstance = newPrune;
739 PrunableExpressionsWalker(instance->continueAt, context);
740 context->currentPruningInstance = NULL;
741 }
742 }
743
744
745 /*
746 * PrunableExpressionsWalker() is the main work horse for
747 * PrunableExpressions().
748 */
749 static void
PrunableExpressionsWalker(PruningTreeNode * node,ClauseWalkerContext * context)750 PrunableExpressionsWalker(PruningTreeNode *node, ClauseWalkerContext *context)
751 {
752 ListCell *cell = NULL;
753
754 if (node == NULL)
755 {
756 return;
757 }
758
759 if (node->boolop == OR_EXPR)
760 {
761 /*
762 * "Queue" partial pruning instances. This is used to convert
763 * expressions like (A AND (B OR C) AND D) into (A AND B AND D),
764 * (A AND C AND D), with A, B, C, D being restrictions. When the
765 * OR is encountered, a reference to the partially built
766 * PruningInstance (containing A at this point), is added to
767 * context->pendingInstances once for B and once for C. Once a
768 * full tree-walk completed, PrunableExpressions() will complete
769 * the pending instances, which'll now also know about restriction
770 * D, by calling PrunableExpressionsWalker() once for B and once
771 * for C.
772 */
773
774 if (node->hasInvalidConstraints)
775 {
776 PruningTreeNode *child = CreatePruningNode(AND_EXPR);
777 child->hasInvalidConstraints = true;
778
779 AddNewConjuction(context, child);
780 }
781
782 foreach(cell, node->validConstraints)
783 {
784 Node *constraint = (Node *) lfirst(cell);
785
786 PruningTreeNode *child = CreatePruningNode(AND_EXPR);
787 child->validConstraints = list_make1(constraint);
788
789 AddNewConjuction(context, child);
790 }
791
792 foreach(cell, node->childBooleanNodes)
793 {
794 PruningTreeNode *child = (PruningTreeNode *) lfirst(cell);
795 Assert(child->boolop == AND_EXPR);
796 AddNewConjuction(context, child);
797 }
798
799 return;
800 }
801
802 Assert(node->boolop == AND_EXPR);
803
804 foreach(cell, node->validConstraints)
805 {
806 Node *constraint = (Node *) lfirst(cell);
807
808 if (IsA(constraint, OpExpr))
809 {
810 OpExpr *opClause = (OpExpr *) constraint;
811 PruningInstance *prune = context->currentPruningInstance;
812 Var *varClause = NULL;
813 Const *constantClause = NULL;
814
815 if (!prune->addedToPruningInstances)
816 {
817 context->pruningInstances = lappend(context->pruningInstances, prune);
818 prune->addedToPruningInstances = true;
819 }
820
821 if (VarConstOpExprClause(opClause, &varClause, &constantClause))
822 {
823 if (equal(varClause, context->partitionColumn))
824 {
825 /*
826 * Found a restriction on the partition column itself. Update the
827 * current constraint with the new information.
828 */
829 AddPartitionKeyRestrictionToInstance(context, opClause, varClause,
830 constantClause);
831 }
832 else if (varClause->varattno == RESERVED_HASHED_COLUMN_ID)
833 {
834 /*
835 * Found restriction that directly specifies the boundaries of a
836 * hashed column.
837 */
838 AddHashRestrictionToInstance(context, opClause, varClause,
839 constantClause);
840 }
841 else
842 {
843 /* We encounter here only valid constraints */
844 Assert(false);
845 }
846 }
847 else
848 {
849 /* We encounter here only valid constraints */
850 Assert(false);
851 }
852 }
853 else if (IsA(constraint, ScalarArrayOpExpr))
854 {
855 ScalarArrayOpExpr *arrayOperatorExpression = (ScalarArrayOpExpr *) constraint;
856 AddSAOPartitionKeyRestrictionToInstance(context, arrayOperatorExpression);
857 }
858 else
859 {
860 /* We encounter here only valid constraints */
861 Assert(false);
862 }
863 }
864
865 if (node->hasInvalidConstraints)
866 {
867 PruningInstance *prune = context->currentPruningInstance;
868
869 /*
870 * Mark unknown expression as added, so we'll fail pruning if there's no ANDed
871 * restrictions that we know how to deal with.
872 */
873 if (!prune->addedToPruningInstances)
874 {
875 context->pruningInstances = lappend(context->pruningInstances, prune);
876 prune->addedToPruningInstances = true;
877 }
878 }
879
880 foreach(cell, node->childBooleanNodes)
881 {
882 PruningTreeNode *child = (PruningTreeNode *) lfirst(cell);
883 Assert(child->boolop == OR_EXPR);
884 PrunableExpressionsWalker(child, context);
885 }
886 }
887
888
889 /*
890 * VarConstOpExprClause check whether an expression is a valid comparison of a Var to a Const.
891 * Also obtaining the var with constant when valid.
892 */
893 bool
VarConstOpExprClause(OpExpr * opClause,Var ** varClause,Const ** constantClause)894 VarConstOpExprClause(OpExpr *opClause, Var **varClause, Const **constantClause)
895 {
896 Var *foundVarClause = NULL;
897 Const *foundConstantClause = NULL;
898
899 Node *leftOperand;
900 Node *rightOperand;
901 if (!BinaryOpExpression((Expr *) opClause, &leftOperand, &rightOperand))
902 {
903 return false;
904 }
905
906 if (IsA(rightOperand, Const) && IsA(leftOperand, Var))
907 {
908 foundVarClause = (Var *) leftOperand;
909 foundConstantClause = (Const *) rightOperand;
910 }
911 else if (IsA(leftOperand, Const) && IsA(rightOperand, Var))
912 {
913 foundVarClause = (Var *) rightOperand;
914 foundConstantClause = (Const *) leftOperand;
915 }
916 else
917 {
918 return false;
919 }
920
921 if (varClause)
922 {
923 *varClause = foundVarClause;
924 }
925 if (constantClause)
926 {
927 *constantClause = foundConstantClause;
928 }
929 return true;
930 }
931
932
933 /*
934 * AddSAOPartitionKeyRestrictionToInstance adds partcol = arrayelem operator
935 * restriction to the current pruning instance for each element of the array. These
936 * restrictions are added to pruning instance to prune shards based on IN/=ANY
937 * constraints.
938 */
939 static void
AddSAOPartitionKeyRestrictionToInstance(ClauseWalkerContext * context,ScalarArrayOpExpr * arrayOperatorExpression)940 AddSAOPartitionKeyRestrictionToInstance(ClauseWalkerContext *context,
941 ScalarArrayOpExpr *arrayOperatorExpression)
942 {
943 List *restrictions = NULL;
944 bool validSAORestriction PG_USED_FOR_ASSERTS_ONLY =
945 SAORestrictions(arrayOperatorExpression, context->partitionColumn, &restrictions);
946
947 Assert(validSAORestriction);
948
949 PruningTreeNode *node = CreatePruningNode(OR_EXPR);
950 node->validConstraints = restrictions;
951 AddNewConjuction(context, node);
952 }
953
954
955 /*
956 * SAORestrictions checks whether an SAO constraint is valid.
957 * Also obtains equality restrictions.
958 */
959 static bool
SAORestrictions(ScalarArrayOpExpr * arrayOperatorExpression,Var * partitionColumn,List ** requestedRestrictions)960 SAORestrictions(ScalarArrayOpExpr *arrayOperatorExpression, Var *partitionColumn,
961 List **requestedRestrictions)
962 {
963 Node *leftOpExpression = linitial(arrayOperatorExpression->args);
964 Node *strippedLeftOpExpression = strip_implicit_coercions(leftOpExpression);
965 bool usingEqualityOperator = OperatorImplementsEquality(
966 arrayOperatorExpression->opno);
967 Expr *arrayArgument = (Expr *) lsecond(arrayOperatorExpression->args);
968
969 /* checking for partcol = ANY(const, value, s); or partcol IN (const,b,c); */
970 if (usingEqualityOperator && strippedLeftOpExpression != NULL &&
971 equal(strippedLeftOpExpression, partitionColumn) &&
972 IsA(arrayArgument, Const))
973 {
974 Const *arrayConst = (Const *) arrayArgument;
975 int16 typlen = 0;
976 bool typbyval = false;
977 char typalign = '\0';
978 Datum arrayElement = 0;
979 Datum inArray = arrayConst->constvalue;
980 bool isNull = false;
981 bool foundValid = false;
982
983 /* check for the NULL right-hand expression*/
984 if (inArray == 0)
985 {
986 return false;
987 }
988
989 ArrayType *array = DatumGetArrayTypeP(arrayConst->constvalue);
990
991 /* get the necessary information from array type to iterate over it */
992 Oid elementType = ARR_ELEMTYPE(array);
993 get_typlenbyvalalign(elementType,
994 &typlen,
995 &typbyval,
996 &typalign);
997
998 /* Iterate over the righthand array of expression */
999 ArrayIterator arrayIterator = array_create_iterator(array, 0, NULL);
1000 while (array_iterate(arrayIterator, &arrayElement, &isNull))
1001 {
1002 if (isNull)
1003 {
1004 /*
1005 * We can ignore IN (NULL) clauses because a value is never
1006 * equal to NULL.
1007 */
1008 continue;
1009 }
1010
1011 foundValid = true;
1012
1013 if (requestedRestrictions)
1014 {
1015 Const *constElement = makeConst(elementType, -1,
1016 arrayConst->constcollid,
1017 typlen, arrayElement,
1018 isNull, typbyval);
1019
1020 /* build partcol = arrayelem operator */
1021 OpExpr *arrayEqualityOp = SAORestrictionArrayEqualityOp(
1022 arrayOperatorExpression,
1023 partitionColumn);
1024 arrayEqualityOp->args = list_make2(strippedLeftOpExpression,
1025 constElement);
1026
1027 *requestedRestrictions = lappend(*requestedRestrictions, arrayEqualityOp);
1028 }
1029 else
1030 {
1031 break;
1032 }
1033 }
1034
1035 return foundValid;
1036 }
1037
1038 return false;
1039 }
1040
1041
1042 /*
1043 * AddNewConjuction adds the OpExpr to pending instance list of context
1044 * as conjunction as partial instance.
1045 */
1046 static void
AddNewConjuction(ClauseWalkerContext * context,PruningTreeNode * node)1047 AddNewConjuction(ClauseWalkerContext *context, PruningTreeNode *node)
1048 {
1049 PendingPruningInstance *instance = palloc0(sizeof(PendingPruningInstance));
1050
1051 instance->instance = context->currentPruningInstance;
1052 instance->continueAt = node;
1053
1054 /*
1055 * Signal that this instance is not to be used for pruning on
1056 * its own. Once the pending instance is processed, it'll be
1057 * used.
1058 */
1059 instance->instance->isPartial = true;
1060 context->pendingInstances = lappend(context->pendingInstances, instance);
1061 }
1062
1063
1064 /*
1065 * IsValidPartitionKeyRestriction check whether an operator clause is
1066 * a valid restriction for comparing to a partition column.
1067 */
1068 static bool
IsValidPartitionKeyRestriction(OpExpr * opClause)1069 IsValidPartitionKeyRestriction(OpExpr *opClause)
1070 {
1071 ListCell *btreeInterpretationCell = NULL;
1072 bool matchedOp = false;
1073
1074 List *btreeInterpretationList =
1075 get_op_btree_interpretation(opClause->opno);
1076 foreach(btreeInterpretationCell, btreeInterpretationList)
1077 {
1078 OpBtreeInterpretation *btreeInterpretation =
1079 (OpBtreeInterpretation *) lfirst(btreeInterpretationCell);
1080
1081 if (btreeInterpretation->strategy == ROWCOMPARE_NE)
1082 {
1083 /* TODO: could add support for this, if we feel like it */
1084 return false;
1085 }
1086
1087 matchedOp = true;
1088 }
1089
1090 return matchedOp;
1091 }
1092
1093
1094 /*
1095 * AddPartitionKeyRestrictionToInstance adds information about a PartitionKey
1096 * $op Const restriction to the current pruning instance.
1097 */
1098 static void
AddPartitionKeyRestrictionToInstance(ClauseWalkerContext * context,OpExpr * opClause,Var * partitionColumn,Const * constantClause)1099 AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opClause,
1100 Var *partitionColumn, Const *constantClause)
1101 {
1102 PruningInstance *prune = context->currentPruningInstance;
1103 ListCell *btreeInterpretationCell = NULL;
1104
1105 /* only have extra work to do if const isn't same type as partition column */
1106 if (constantClause->consttype != partitionColumn->vartype)
1107 {
1108 /* we want our restriction value in terms of the type of the partition column */
1109 constantClause = TransformPartitionRestrictionValue(partitionColumn,
1110 constantClause, true);
1111 if (constantClause == NULL)
1112 {
1113 /* couldn't coerce value, its invalid restriction */
1114 return;
1115 }
1116 }
1117
1118 if (constantClause->constisnull)
1119 {
1120 /* we cannot do pruning on NULL values */
1121 return;
1122 }
1123
1124 /* at this point, we'd better be able to pass binary Datums to comparison functions */
1125 Assert(IsBinaryCoercible(constantClause->consttype, partitionColumn->vartype));
1126
1127 List *btreeInterpretationList = get_op_btree_interpretation(opClause->opno);
1128 foreach(btreeInterpretationCell, btreeInterpretationList)
1129 {
1130 OpBtreeInterpretation *btreeInterpretation =
1131 (OpBtreeInterpretation *) lfirst(btreeInterpretationCell);
1132
1133 switch (btreeInterpretation->strategy)
1134 {
1135 case BTLessStrategyNumber:
1136 {
1137 if (!prune->lessConsts ||
1138 PerformValueCompare((FunctionCallInfo) &
1139 context->compareValueFunctionCall,
1140 constantClause->constvalue,
1141 prune->lessConsts->constvalue) < 0)
1142 {
1143 prune->lessConsts = constantClause;
1144 }
1145 break;
1146 }
1147
1148 case BTLessEqualStrategyNumber:
1149 {
1150 if (!prune->lessEqualConsts ||
1151 PerformValueCompare((FunctionCallInfo) &
1152 context->compareValueFunctionCall,
1153 constantClause->constvalue,
1154 prune->lessEqualConsts->constvalue) < 0)
1155 {
1156 prune->lessEqualConsts = constantClause;
1157 }
1158 break;
1159 }
1160
1161 case BTEqualStrategyNumber:
1162 {
1163 if (!prune->equalConsts)
1164 {
1165 prune->equalConsts = constantClause;
1166 }
1167 else if (PerformValueCompare((FunctionCallInfo) &
1168 context->compareValueFunctionCall,
1169 constantClause->constvalue,
1170 prune->equalConsts->constvalue) != 0)
1171 {
1172 /* key can't be equal to two values */
1173 prune->evaluatesToFalse = true;
1174 }
1175 break;
1176 }
1177
1178 case BTGreaterEqualStrategyNumber:
1179 {
1180 if (!prune->greaterEqualConsts ||
1181 PerformValueCompare((FunctionCallInfo) &
1182 context->compareValueFunctionCall,
1183 constantClause->constvalue,
1184 prune->greaterEqualConsts->constvalue) > 0
1185 )
1186 {
1187 prune->greaterEqualConsts = constantClause;
1188 }
1189 break;
1190 }
1191
1192 case BTGreaterStrategyNumber:
1193 {
1194 if (!prune->greaterConsts ||
1195 PerformValueCompare((FunctionCallInfo) &
1196 context->compareValueFunctionCall,
1197 constantClause->constvalue,
1198 prune->greaterConsts->constvalue) > 0)
1199 {
1200 prune->greaterConsts = constantClause;
1201 }
1202 break;
1203 }
1204
1205 default:
1206 Assert(false);
1207 }
1208 }
1209
1210 prune->hasValidConstraint = true;
1211 }
1212
1213
1214 /*
1215 * TransformPartitionRestrictionValue works around how PostgreSQL sometimes
1216 * chooses to try to wrap our Var in a coercion rather than the Const.
1217 * To deal with this, we strip coercions from both and manually coerce
1218 * the Const into the type of our partition column.
1219 * It is conceivable that in some instances this may not be possible,
1220 * in those cases we will simply fail to prune partitions based on this clause.
1221 */
1222 Const *
TransformPartitionRestrictionValue(Var * partitionColumn,Const * restrictionValue,bool missingOk)1223 TransformPartitionRestrictionValue(Var *partitionColumn, Const *restrictionValue,
1224 bool missingOk)
1225 {
1226 Node *transformedValue = coerce_to_target_type(NULL, (Node *) restrictionValue,
1227 restrictionValue->consttype,
1228 partitionColumn->vartype,
1229 partitionColumn->vartypmod,
1230 COERCION_ASSIGNMENT,
1231 COERCE_IMPLICIT_CAST, -1);
1232
1233 /* if NULL, no implicit coercion is possible between the types */
1234 if (transformedValue == NULL)
1235 {
1236 if (!missingOk)
1237 {
1238 ErrorTypesDontMatch(partitionColumn->vartype, partitionColumn->varcollid,
1239 restrictionValue->consttype,
1240 restrictionValue->constcollid);
1241 }
1242
1243 return NULL;
1244 }
1245
1246 /* if still not a constant, evaluate coercion */
1247 if (!IsA(transformedValue, Const))
1248 {
1249 transformedValue = (Node *) expression_planner((Expr *) transformedValue);
1250 }
1251
1252 /* if still not a constant, no immutable coercion matched */
1253 if (!IsA(transformedValue, Const))
1254 {
1255 if (!missingOk)
1256 {
1257 ErrorTypesDontMatch(partitionColumn->vartype, partitionColumn->varcollid,
1258 restrictionValue->consttype,
1259 restrictionValue->constcollid);
1260 }
1261
1262 return NULL;
1263 }
1264
1265 return (Const *) transformedValue;
1266 }
1267
1268
1269 /*
1270 * ErrorTypesDontMatch throws an error explicitly printing the type names.
1271 */
1272 static void
ErrorTypesDontMatch(Oid firstType,Oid firstCollId,Oid secondType,Oid secondCollId)1273 ErrorTypesDontMatch(Oid firstType, Oid firstCollId, Oid secondType, Oid secondCollId)
1274 {
1275 Datum firstTypename =
1276 DirectFunctionCall1Coll(regtypeout, firstCollId, ObjectIdGetDatum(firstType));
1277
1278 Datum secondTypename =
1279 DirectFunctionCall1Coll(regtypeout, secondCollId, ObjectIdGetDatum(secondType));
1280
1281 ereport(ERROR, (errmsg("Cannot coerce %s to %s",
1282 DatumGetCString(secondTypename),
1283 DatumGetCString(firstTypename))));
1284 }
1285
1286
1287 /*
1288 * IsValidHashRestriction checks whether an operator clause is a valid restriction for hashed column.
1289 */
1290 static bool
IsValidHashRestriction(OpExpr * opClause)1291 IsValidHashRestriction(OpExpr *opClause)
1292 {
1293 ListCell *btreeInterpretationCell = NULL;
1294
1295 List *btreeInterpretationList =
1296 get_op_btree_interpretation(opClause->opno);
1297 foreach(btreeInterpretationCell, btreeInterpretationList)
1298 {
1299 OpBtreeInterpretation *btreeInterpretation =
1300 (OpBtreeInterpretation *) lfirst(btreeInterpretationCell);
1301
1302 if (btreeInterpretation->strategy == BTGreaterEqualStrategyNumber)
1303 {
1304 return true;
1305 }
1306 }
1307
1308 return false;
1309 }
1310
1311
1312 /*
1313 * AddHashRestrictionToInstance adds information about a
1314 * RESERVED_HASHED_COLUMN_ID = Const restriction to the current pruning
1315 * instance.
1316 */
1317 static void
AddHashRestrictionToInstance(ClauseWalkerContext * context,OpExpr * opClause,Var * varClause,Const * constantClause)1318 AddHashRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opClause,
1319 Var *varClause, Const *constantClause)
1320 {
1321 /* be paranoid */
1322 Assert(IsBinaryCoercible(constantClause->consttype, INT4OID));
1323 Assert(IsValidHashRestriction(opClause));
1324
1325 /*
1326 * Ladidadida, dirty hackety hack. We only add such
1327 * constraints (in ShardIntervalOpExpressions()) to select a
1328 * shard based on its exact boundaries. For efficient binary
1329 * search it's better to simply use one representative value
1330 * to look up the shard. In practice, this is sufficient for
1331 * now.
1332 */
1333 PruningInstance *prune = context->currentPruningInstance;
1334 Assert(!prune->hashedEqualConsts);
1335 prune->hashedEqualConsts = constantClause;
1336 prune->hasValidConstraint = true;
1337 }
1338
1339
1340 /*
1341 * CopyPartialPruningInstance copies a partial PruningInstance, so it can be
1342 * completed.
1343 */
1344 static PruningInstance *
CopyPartialPruningInstance(PruningInstance * sourceInstance)1345 CopyPartialPruningInstance(PruningInstance *sourceInstance)
1346 {
1347 PruningInstance *newInstance = palloc(sizeof(PruningInstance));
1348
1349 Assert(sourceInstance->isPartial);
1350
1351 /*
1352 * To make the new PruningInstance useful for pruning, we have to reset it
1353 * being partial - if necessary it'll be marked so again by
1354 * PrunableExpressionsWalker().
1355 */
1356 *newInstance = *sourceInstance;
1357 newInstance->addedToPruningInstances = false;
1358 newInstance->isPartial = false;
1359
1360 return newInstance;
1361 }
1362
1363
1364 /*
1365 * ShardArrayToList builds a list of out the array of ShardInterval*.
1366 */
1367 static List *
ShardArrayToList(ShardInterval ** shardArray,int length)1368 ShardArrayToList(ShardInterval **shardArray, int length)
1369 {
1370 List *shardIntervalList = NIL;
1371
1372 for (int shardIndex = 0; shardIndex < length; shardIndex++)
1373 {
1374 ShardInterval *shardInterval =
1375 shardArray[shardIndex];
1376 shardIntervalList = lappend(shardIntervalList, shardInterval);
1377 }
1378
1379 return shardIntervalList;
1380 }
1381
1382
1383 /*
1384 * DeepCopyShardIntervalList copies originalShardIntervalList and the
1385 * contained ShardIntervals, into a new list.
1386 */
1387 static List *
DeepCopyShardIntervalList(List * originalShardIntervalList)1388 DeepCopyShardIntervalList(List *originalShardIntervalList)
1389 {
1390 List *copiedShardIntervalList = NIL;
1391
1392 ShardInterval *originalShardInterval = NULL;
1393 foreach_ptr(originalShardInterval, originalShardIntervalList)
1394 {
1395 ShardInterval *copiedShardInterval = CopyShardInterval(originalShardInterval);
1396
1397 copiedShardIntervalList = lappend(copiedShardIntervalList, copiedShardInterval);
1398 }
1399
1400 return copiedShardIntervalList;
1401 }
1402
1403
1404 /*
1405 * PruneOne returns all shards in the table that match a single
1406 * PruningInstance.
1407 */
1408 static List *
PruneOne(CitusTableCacheEntry * cacheEntry,ClauseWalkerContext * context,PruningInstance * prune)1409 PruneOne(CitusTableCacheEntry *cacheEntry, ClauseWalkerContext *context,
1410 PruningInstance *prune)
1411 {
1412 ShardInterval *shardInterval = NULL;
1413
1414 /* Well, if life always were this easy... */
1415 if (prune->evaluatesToFalse)
1416 {
1417 return NIL;
1418 }
1419
1420 /*
1421 * For an equal constraints, if there's no overlapping shards (always the
1422 * case for hash and range partitioning, sometimes for append), can
1423 * perform binary search for the right interval. That's usually the
1424 * fastest, so try that first.
1425 */
1426 if (prune->equalConsts &&
1427 !cacheEntry->hasOverlappingShardInterval)
1428 {
1429 shardInterval = FindShardInterval(prune->equalConsts->constvalue, cacheEntry);
1430
1431 /*
1432 * If pruned down to nothing, we're done. Otherwise see if other
1433 * methods prune down further / to nothing.
1434 */
1435 if (!shardInterval)
1436 {
1437 return NIL;
1438 }
1439 }
1440
1441 /*
1442 * If the hash value we're looking for is known, we can search for the
1443 * interval directly. That's fast and should only ever be the case for a
1444 * hash-partitioned table.
1445 */
1446 if (prune->hashedEqualConsts)
1447 {
1448 ShardInterval **sortedShardIntervalArray = cacheEntry->sortedShardIntervalArray;
1449
1450 Assert(context->partitionMethod == DISTRIBUTE_BY_HASH);
1451
1452 int shardIndex = FindShardIntervalIndex(prune->hashedEqualConsts->constvalue,
1453 cacheEntry);
1454
1455 if (shardIndex == INVALID_SHARD_INDEX)
1456 {
1457 return NIL;
1458 }
1459 else if (shardInterval &&
1460 sortedShardIntervalArray[shardIndex]->shardId != shardInterval->shardId)
1461 {
1462 /*
1463 * equalConst based pruning above yielded a different shard than
1464 * pruning based on pre-hashed equality. This is useful in case
1465 * of INSERT ... SELECT, where both can occur together (one via
1466 * join/colocation, the other via a plain equality restriction).
1467 */
1468 return NIL;
1469 }
1470 else
1471 {
1472 return list_make1(sortedShardIntervalArray[shardIndex]);
1473 }
1474 }
1475
1476 /*
1477 * If previous pruning method yielded a single shard, and the table is not
1478 * hash partitioned, attempt range based pruning to exclude it further.
1479 *
1480 * That's particularly important in particular for subquery pushdown,
1481 * where it's very common to have a user specified equality restriction,
1482 * and a range based restriction for shard boundaries, added by the
1483 * subquery machinery.
1484 */
1485 if (shardInterval)
1486 {
1487 if (context->partitionMethod != DISTRIBUTE_BY_HASH &&
1488 ExhaustivePruneOne(shardInterval, context, prune))
1489 {
1490 return NIL;
1491 }
1492 else
1493 {
1494 /* no chance to prune further, return */
1495 return list_make1(shardInterval);
1496 }
1497 }
1498
1499 /*
1500 * Should never get here for hashing, we've filtered down to either zero
1501 * or one shard, and returned.
1502 */
1503 Assert(context->partitionMethod != DISTRIBUTE_BY_HASH);
1504
1505 /*
1506 * Next method: binary search with fuzzy boundaries. Can't trivially do so
1507 * if shards have overlapping boundaries.
1508 *
1509 * TODO: If we kept shard intervals separately sorted by both upper and
1510 * lower boundaries, this should be possible?
1511 */
1512 if (!cacheEntry->hasOverlappingShardInterval && (
1513 prune->greaterConsts || prune->greaterEqualConsts ||
1514 prune->lessConsts || prune->lessEqualConsts))
1515 {
1516 return PruneWithBoundaries(cacheEntry, context, prune);
1517 }
1518
1519 /*
1520 * Brute force: Check each shard.
1521 */
1522 return ExhaustivePrune(cacheEntry, context, prune);
1523 }
1524
1525
1526 /*
1527 * PerformCompare invokes comparator with prepared values, check for
1528 * unexpected NULL returns.
1529 */
1530 static int
PerformCompare(FunctionCallInfo compareFunctionCall)1531 PerformCompare(FunctionCallInfo compareFunctionCall)
1532 {
1533 Datum result = FunctionCallInvoke(compareFunctionCall);
1534
1535 if (compareFunctionCall->isnull)
1536 {
1537 elog(ERROR, "function %u returned NULL", compareFunctionCall->flinfo->fn_oid);
1538 }
1539
1540 return DatumGetInt32(result);
1541 }
1542
1543
1544 /*
1545 * PerformValueCompare invokes comparator with a/b, and checks for unexpected
1546 * NULL returns.
1547 */
1548 static int
PerformValueCompare(FunctionCallInfo compareFunctionCall,Datum a,Datum b)1549 PerformValueCompare(FunctionCallInfo compareFunctionCall, Datum a, Datum b)
1550 {
1551 fcSetArg(compareFunctionCall, 0, a);
1552 fcSetArg(compareFunctionCall, 1, b);
1553
1554 return PerformCompare(compareFunctionCall);
1555 }
1556
1557
1558 /*
1559 * LowerShardBoundary returns the index of the first ShardInterval that's >=
1560 * (if includeMax) or > partitionColumnValue.
1561 */
1562 static int
LowerShardBoundary(Datum partitionColumnValue,ShardInterval ** shardIntervalCache,int shardCount,FunctionCallInfo compareFunction,bool includeMax)1563 LowerShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCache,
1564 int shardCount, FunctionCallInfo compareFunction, bool includeMax)
1565 {
1566 int lowerBoundIndex = 0;
1567 int upperBoundIndex = shardCount;
1568
1569 Assert(shardCount != 0);
1570
1571 /* setup partitionColumnValue argument once */
1572 fcSetArg(compareFunction, 0, partitionColumnValue);
1573
1574 /*
1575 * Now we test partitionColumnValue used in where clause such as
1576 * partCol > partitionColumnValue (or partCol >= partitionColumnValue)
1577 * against four possibilities, these are:
1578 * 1) partitionColumnValue falls into a specific shard, such that:
1579 * partitionColumnValue >= shard[x].min, and
1580 * partitionColumnValue < shard[x].max (or partitionColumnValue <= shard[x].max).
1581 * 2) partitionColumnValue < shard[x].min for all the shards
1582 * 3) partitionColumnValue > shard[x].max for all the shards
1583 * 4) partitionColumnValue falls in between two shards, such that:
1584 * partitionColumnValue > shard[x].max and
1585 * partitionColumnValue < shard[x+1].min
1586 *
1587 * For 1), we find that shard in below loop using binary search and
1588 * return the index of it. For the others, see the end of this function.
1589 */
1590 while (lowerBoundIndex < upperBoundIndex)
1591 {
1592 int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
1593
1594 /* setup minValue as argument */
1595 fcSetArg(compareFunction, 1, shardIntervalCache[middleIndex]->minValue);
1596
1597 /* execute cmp(partitionValue, lowerBound) */
1598 int minValueComparison = PerformCompare(compareFunction);
1599
1600 /* and evaluate results */
1601 if (minValueComparison < 0)
1602 {
1603 /* value smaller than entire range */
1604 upperBoundIndex = middleIndex;
1605 continue;
1606 }
1607
1608 /* setup maxValue as argument */
1609 fcSetArg(compareFunction, 1, shardIntervalCache[middleIndex]->maxValue);
1610
1611 /* execute cmp(partitionValue, upperBound) */
1612 int maxValueComparison = PerformCompare(compareFunction);
1613
1614 if ((maxValueComparison == 0 && !includeMax) ||
1615 maxValueComparison > 0)
1616 {
1617 /* value bigger than entire range */
1618 lowerBoundIndex = middleIndex + 1;
1619 continue;
1620 }
1621
1622 /* partitionColumnValue falls into a specific shard, possibility 1) */
1623 return middleIndex;
1624 }
1625
1626 Assert(lowerBoundIndex == upperBoundIndex);
1627
1628 /*
1629 * If we get here, none of the ShardIntervals exactly contain the value
1630 * (we'd have hit the return middleIndex; case otherwise). Figure out
1631 * whether there's possibly any interval containing a value that's bigger
1632 * than the partition key one.
1633 *
1634 * Also note that we initialized lowerBoundIndex with 0. Similarly,
1635 * we always set it to the index of the shard that we consider as our
1636 * lower boundary during binary search.
1637 */
1638 if (lowerBoundIndex == shardCount)
1639 {
1640 /*
1641 * Since lowerBoundIndex is an inclusive index, being equal to shardCount
1642 * means all the shards have smaller values than partitionColumnValue,
1643 * which corresponds to possibility 3).
1644 * In that case, since we can't have a lower bound shard, we return
1645 * INVALID_SHARD_INDEX here.
1646 */
1647 return INVALID_SHARD_INDEX;
1648 }
1649
1650 /*
1651 * partitionColumnValue is either smaller than all the shards or falls in
1652 * between two shards, which corresponds to possibility 2) or 4).
1653 * Knowing that lowerBoundIndex is an inclusive index, we directly return
1654 * it as the index for the lower bound shard here.
1655 */
1656 return lowerBoundIndex;
1657 }
1658
1659
1660 /*
1661 * UpperShardBoundary returns the index of the last ShardInterval that's <=
1662 * (if includeMin) or < partitionColumnValue.
1663 */
1664 static int
UpperShardBoundary(Datum partitionColumnValue,ShardInterval ** shardIntervalCache,int shardCount,FunctionCallInfo compareFunction,bool includeMin)1665 UpperShardBoundary(Datum partitionColumnValue, ShardInterval **shardIntervalCache,
1666 int shardCount, FunctionCallInfo compareFunction, bool includeMin)
1667 {
1668 int lowerBoundIndex = 0;
1669 int upperBoundIndex = shardCount;
1670
1671 Assert(shardCount != 0);
1672
1673 /* setup partitionColumnValue argument once */
1674 fcSetArg(compareFunction, 0, partitionColumnValue);
1675
1676 /*
1677 * Now we test partitionColumnValue used in where clause such as
1678 * partCol < partitionColumnValue (or partCol <= partitionColumnValue)
1679 * against four possibilities, these are:
1680 * 1) partitionColumnValue falls into a specific shard, such that:
1681 * partitionColumnValue <= shard[x].max, and
1682 * partitionColumnValue > shard[x].min (or partitionColumnValue >= shard[x].min).
1683 * 2) partitionColumnValue > shard[x].max for all the shards
1684 * 3) partitionColumnValue < shard[x].min for all the shards
1685 * 4) partitionColumnValue falls in between two shards, such that:
1686 * partitionColumnValue > shard[x].max and
1687 * partitionColumnValue < shard[x+1].min
1688 *
1689 * For 1), we find that shard in below loop using binary search and
1690 * return the index of it. For the others, see the end of this function.
1691 */
1692
1693 while (lowerBoundIndex < upperBoundIndex)
1694 {
1695 int middleIndex = lowerBoundIndex + ((upperBoundIndex - lowerBoundIndex) / 2);
1696
1697 /* setup minValue as argument */
1698 fcSetArg(compareFunction, 1, shardIntervalCache[middleIndex]->minValue);
1699
1700 /* execute cmp(partitionValue, lowerBound) */
1701 int minValueComparison = PerformCompare(compareFunction);
1702
1703 /* and evaluate results */
1704 if ((minValueComparison == 0 && !includeMin) ||
1705 minValueComparison < 0)
1706 {
1707 /* value smaller than entire range */
1708 upperBoundIndex = middleIndex;
1709 continue;
1710 }
1711
1712 /* setup maxValue as argument */
1713 fcSetArg(compareFunction, 1, shardIntervalCache[middleIndex]->maxValue);
1714
1715 /* execute cmp(partitionValue, upperBound) */
1716 int maxValueComparison = PerformCompare(compareFunction);
1717
1718 if (maxValueComparison > 0)
1719 {
1720 /* value bigger than entire range */
1721 lowerBoundIndex = middleIndex + 1;
1722 continue;
1723 }
1724
1725 /* partitionColumnValue falls into a specific shard, possibility 1) */
1726 return middleIndex;
1727 }
1728
1729 Assert(lowerBoundIndex == upperBoundIndex);
1730
1731 /*
1732 * If we get here, none of the ShardIntervals exactly contain the value
1733 * (we'd have hit the return middleIndex; case otherwise). Figure out
1734 * whether there's possibly any interval containing a value that's smaller
1735 * than the partition key one.
1736 *
1737 * Also note that we initialized upperBoundIndex with shardCount. Similarly,
1738 * we always set it to the index of the next shard that we consider as our
1739 * upper boundary during binary search.
1740 */
1741 if (upperBoundIndex == 0)
1742 {
1743 /*
1744 * Since upperBoundIndex is an exclusive index, being equal to 0 means
1745 * all the shards have greater values than partitionColumnValue, which
1746 * corresponds to possibility 3).
1747 * In that case, since we can't have an upper bound shard, we return
1748 * INVALID_SHARD_INDEX here.
1749 */
1750 return INVALID_SHARD_INDEX;
1751 }
1752
1753 /*
1754 * partitionColumnValue is either greater than all the shards or falls in
1755 * between two shards, which corresponds to possibility 2) or 4).
1756 * Knowing that upperBoundIndex is an exclusive index, we return the index
1757 * for the previous shard here.
1758 */
1759 return upperBoundIndex - 1;
1760 }
1761
1762
1763 /*
1764 * PruneWithBoundaries searches for shards that match inequality constraints,
1765 * using binary search on both the upper and lower boundary, and returns a
1766 * list of surviving shards.
1767 */
1768 static List *
PruneWithBoundaries(CitusTableCacheEntry * cacheEntry,ClauseWalkerContext * context,PruningInstance * prune)1769 PruneWithBoundaries(CitusTableCacheEntry *cacheEntry, ClauseWalkerContext *context,
1770 PruningInstance *prune)
1771 {
1772 List *remainingShardList = NIL;
1773 int shardCount = cacheEntry->shardIntervalArrayLength;
1774 ShardInterval **sortedShardIntervalArray = cacheEntry->sortedShardIntervalArray;
1775 bool hasLowerBound = false;
1776 bool hasUpperBound = false;
1777 Datum lowerBound = 0;
1778 Datum upperBound = 0;
1779 bool lowerBoundInclusive = false;
1780 bool upperBoundInclusive = false;
1781 int lowerBoundIdx = -1;
1782 int upperBoundIdx = -1;
1783 FunctionCallInfo compareFunctionCall = (FunctionCallInfo) &
1784 context->compareIntervalFunctionCall;
1785
1786 if (prune->greaterEqualConsts)
1787 {
1788 lowerBound = prune->greaterEqualConsts->constvalue;
1789 lowerBoundInclusive = true;
1790 hasLowerBound = true;
1791 }
1792 if (prune->greaterConsts)
1793 {
1794 /*
1795 * Use the more restrictive one, if both greater and greaterEqual
1796 * constraints are specified.
1797 */
1798 if (!hasLowerBound ||
1799 PerformValueCompare(compareFunctionCall,
1800 prune->greaterConsts->constvalue,
1801 lowerBound) >= 0)
1802 {
1803 lowerBound = prune->greaterConsts->constvalue;
1804 lowerBoundInclusive = false;
1805 hasLowerBound = true;
1806 }
1807 }
1808 if (prune->lessEqualConsts)
1809 {
1810 upperBound = prune->lessEqualConsts->constvalue;
1811 upperBoundInclusive = true;
1812 hasUpperBound = true;
1813 }
1814 if (prune->lessConsts)
1815 {
1816 /*
1817 * Use the more restrictive one, if both less and lessEqual
1818 * constraints are specified.
1819 */
1820 if (!hasUpperBound ||
1821 PerformValueCompare(compareFunctionCall,
1822 prune->lessConsts->constvalue,
1823 upperBound) <= 0)
1824 {
1825 upperBound = prune->lessConsts->constvalue;
1826 upperBoundInclusive = false;
1827 hasUpperBound = true;
1828 }
1829 }
1830
1831 Assert(hasLowerBound || hasUpperBound);
1832
1833 /* find lower bound */
1834 if (hasLowerBound)
1835 {
1836 lowerBoundIdx = LowerShardBoundary(lowerBound, sortedShardIntervalArray,
1837 shardCount, compareFunctionCall,
1838 lowerBoundInclusive);
1839 }
1840 else
1841 {
1842 lowerBoundIdx = 0;
1843 }
1844
1845 /* find upper bound */
1846 if (hasUpperBound)
1847 {
1848 upperBoundIdx = UpperShardBoundary(upperBound, sortedShardIntervalArray,
1849 shardCount, compareFunctionCall,
1850 upperBoundInclusive);
1851 }
1852 else
1853 {
1854 upperBoundIdx = shardCount - 1;
1855 }
1856
1857 if (lowerBoundIdx == INVALID_SHARD_INDEX)
1858 {
1859 return NIL;
1860 }
1861 else if (upperBoundIdx == INVALID_SHARD_INDEX)
1862 {
1863 return NIL;
1864 }
1865
1866 /*
1867 * Build list of all shards that are in the range of shards (possibly 0).
1868 */
1869 for (int curIdx = lowerBoundIdx; curIdx <= upperBoundIdx; curIdx++)
1870 {
1871 remainingShardList = lappend(remainingShardList,
1872 sortedShardIntervalArray[curIdx]);
1873 }
1874
1875 return remainingShardList;
1876 }
1877
1878
1879 /*
1880 * ExhaustivePrune returns a list of shards matching PruningInstances
1881 * constraints, by simply checking them for each individual shard.
1882 */
1883 static List *
ExhaustivePrune(CitusTableCacheEntry * cacheEntry,ClauseWalkerContext * context,PruningInstance * prune)1884 ExhaustivePrune(CitusTableCacheEntry *cacheEntry, ClauseWalkerContext *context,
1885 PruningInstance *prune)
1886 {
1887 List *remainingShardList = NIL;
1888 int shardCount = cacheEntry->shardIntervalArrayLength;
1889 ShardInterval **sortedShardIntervalArray = cacheEntry->sortedShardIntervalArray;
1890
1891 for (int curIdx = 0; curIdx < shardCount; curIdx++)
1892 {
1893 ShardInterval *curInterval = sortedShardIntervalArray[curIdx];
1894
1895 if (!ExhaustivePruneOne(curInterval, context, prune))
1896 {
1897 remainingShardList = lappend(remainingShardList, curInterval);
1898 }
1899 }
1900
1901 return remainingShardList;
1902 }
1903
1904
1905 /*
1906 * ExhaustivePruneOne returns whether curInterval is pruned away.
1907 */
1908 static bool
ExhaustivePruneOne(ShardInterval * curInterval,ClauseWalkerContext * context,PruningInstance * prune)1909 ExhaustivePruneOne(ShardInterval *curInterval,
1910 ClauseWalkerContext *context,
1911 PruningInstance *prune)
1912 {
1913 FunctionCallInfo compareFunctionCall = (FunctionCallInfo) &
1914 context->compareIntervalFunctionCall;
1915 Datum compareWith = 0;
1916
1917 /* NULL boundaries can't be compared to */
1918 if (!curInterval->minValueExists || !curInterval->maxValueExists)
1919 {
1920 return false;
1921 }
1922
1923 if (prune->equalConsts)
1924 {
1925 compareWith = prune->equalConsts->constvalue;
1926
1927 if (PerformValueCompare(compareFunctionCall,
1928 compareWith,
1929 curInterval->minValue) < 0)
1930 {
1931 return true;
1932 }
1933
1934 if (PerformValueCompare(compareFunctionCall,
1935 compareWith,
1936 curInterval->maxValue) > 0)
1937 {
1938 return true;
1939 }
1940 }
1941 if (prune->greaterEqualConsts)
1942 {
1943 compareWith = prune->greaterEqualConsts->constvalue;
1944
1945 if (PerformValueCompare(compareFunctionCall,
1946 curInterval->maxValue,
1947 compareWith) < 0)
1948 {
1949 return true;
1950 }
1951 }
1952 if (prune->greaterConsts)
1953 {
1954 compareWith = prune->greaterConsts->constvalue;
1955
1956 if (PerformValueCompare(compareFunctionCall,
1957 curInterval->maxValue,
1958 compareWith) <= 0)
1959 {
1960 return true;
1961 }
1962 }
1963 if (prune->lessEqualConsts)
1964 {
1965 compareWith = prune->lessEqualConsts->constvalue;
1966
1967 if (PerformValueCompare(compareFunctionCall,
1968 curInterval->minValue,
1969 compareWith) > 0)
1970 {
1971 return true;
1972 }
1973 }
1974 if (prune->lessConsts)
1975 {
1976 compareWith = prune->lessConsts->constvalue;
1977
1978 if (PerformValueCompare(compareFunctionCall,
1979 curInterval->minValue,
1980 compareWith) >= 0)
1981 {
1982 return true;
1983 }
1984 }
1985
1986 return false;
1987 }
1988
1989
1990 /*
1991 * Helper for creating a node for pruning tree
1992 */
1993 static PruningTreeNode *
CreatePruningNode(BoolExprType boolop)1994 CreatePruningNode(BoolExprType boolop)
1995 {
1996 PruningTreeNode *node = palloc0(sizeof(PruningTreeNode));
1997 node->boolop = boolop;
1998 node->childBooleanNodes = NIL;
1999 node->validConstraints = NIL;
2000 node->hasInvalidConstraints = false;
2001 return node;
2002 }
2003
2004
2005 /*
2006 * SAORestrictionArrayEqualityOp creates an equality operator
2007 * for a single element of a scalar array constraint.
2008 */
2009 static OpExpr *
SAORestrictionArrayEqualityOp(ScalarArrayOpExpr * arrayOperatorExpression,Var * partitionColumn)2010 SAORestrictionArrayEqualityOp(ScalarArrayOpExpr *arrayOperatorExpression,
2011 Var *partitionColumn)
2012 {
2013 OpExpr *arrayEqualityOp = makeNode(OpExpr);
2014 arrayEqualityOp->opno = arrayOperatorExpression->opno;
2015 arrayEqualityOp->opfuncid = arrayOperatorExpression->opfuncid;
2016 arrayEqualityOp->inputcollid = arrayOperatorExpression->inputcollid;
2017 arrayEqualityOp->opresulttype = get_func_rettype(
2018 arrayOperatorExpression->opfuncid);
2019 arrayEqualityOp->opcollid = partitionColumn->varcollid;
2020 arrayEqualityOp->location = -1;
2021 return arrayEqualityOp;
2022 }
2023
2024
2025 /*
2026 * DebugLogNode is a helper for logging expression nodes.
2027 */
2028 static void
DebugLogNode(char * fmt,Node * node,List * deparseCtx)2029 DebugLogNode(char *fmt, Node *node, List *deparseCtx)
2030 {
2031 if (node != NULL)
2032 {
2033 char *deparsed = deparse_expression(node, deparseCtx, false, false);
2034 ereport(DEBUG3, (errmsg(fmt, deparsed)));
2035 }
2036 }
2037
2038
2039 /*
2040 * DebugLogPruningInstance is a helper for logging purning constraints.
2041 */
2042 static void
DebugLogPruningInstance(PruningInstance * pruning,List * deparseCtx)2043 DebugLogPruningInstance(PruningInstance *pruning, List *deparseCtx)
2044 {
2045 DebugLogNode("constraint value: %s",
2046 (Node *) pruning->equalConsts, deparseCtx);
2047 DebugLogNode("constraint (lt) value: %s", \
2048 (Node *) pruning->lessConsts, deparseCtx);
2049 DebugLogNode("constraint (lteq) value: %s", \
2050 (Node *) pruning->lessEqualConsts, deparseCtx);
2051 DebugLogNode("constraint (gt) value: %s", \
2052 (Node *) pruning->greaterConsts, deparseCtx);
2053 DebugLogNode("constraint (gteq) value: %s",
2054 (Node *) pruning->greaterEqualConsts, deparseCtx);
2055 }
2056
2057
2058 /*
2059 * ConstraintCount returns how many arguments this node is taking.
2060 */
2061 static int
ConstraintCount(PruningTreeNode * node)2062 ConstraintCount(PruningTreeNode *node)
2063 {
2064 return list_length(node->childBooleanNodes) +
2065 list_length(node->validConstraints) +
2066 (node->hasInvalidConstraints ? 1 : 0);
2067 }
2068