1 /*-------------------------------------------------------------------------
2 *
3 * parse_node.c
4 * various routines that make nodes for querytrees
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/parser/parse_node.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "access/heapam.h"
18 #include "access/htup_details.h"
19 #include "catalog/pg_type.h"
20 #include "mb/pg_wchar.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "parser/parsetree.h"
24 #include "parser/parse_coerce.h"
25 #include "parser/parse_expr.h"
26 #include "parser/parse_relation.h"
27 #include "utils/builtins.h"
28 #include "utils/int8.h"
29 #include "utils/lsyscache.h"
30 #include "utils/syscache.h"
31 #include "utils/varbit.h"
32
33
34 static void pcb_error_callback(void *arg);
35
36
37 /*
38 * make_parsestate
39 * Allocate and initialize a new ParseState.
40 *
41 * Caller should eventually release the ParseState via free_parsestate().
42 */
43 ParseState *
make_parsestate(ParseState * parentParseState)44 make_parsestate(ParseState *parentParseState)
45 {
46 ParseState *pstate;
47
48 pstate = palloc0(sizeof(ParseState));
49
50 pstate->parentParseState = parentParseState;
51
52 /* Fill in fields that don't start at null/false/zero */
53 pstate->p_next_resno = 1;
54 pstate->p_resolve_unknowns = true;
55
56 if (parentParseState)
57 {
58 pstate->p_sourcetext = parentParseState->p_sourcetext;
59 /* all hooks are copied from parent */
60 pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
61 pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
62 pstate->p_paramref_hook = parentParseState->p_paramref_hook;
63 pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
64 pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
65 /* query environment stays in context for the whole parse analysis */
66 pstate->p_queryEnv = parentParseState->p_queryEnv;
67 }
68
69 return pstate;
70 }
71
72 /*
73 * free_parsestate
74 * Release a ParseState and any subsidiary resources.
75 */
76 void
free_parsestate(ParseState * pstate)77 free_parsestate(ParseState *pstate)
78 {
79 /*
80 * Check that we did not produce too many resnos; at the very least we
81 * cannot allow more than 2^16, since that would exceed the range of a
82 * AttrNumber. It seems safest to use MaxTupleAttributeNumber.
83 */
84 if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
85 ereport(ERROR,
86 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
87 errmsg("target lists can have at most %d entries",
88 MaxTupleAttributeNumber)));
89
90 if (pstate->p_target_relation != NULL)
91 heap_close(pstate->p_target_relation, NoLock);
92
93 pfree(pstate);
94 }
95
96
97 /*
98 * parser_errposition
99 * Report a parse-analysis-time cursor position, if possible.
100 *
101 * This is expected to be used within an ereport() call. The return value
102 * is a dummy (always 0, in fact).
103 *
104 * The locations stored in raw parsetrees are byte offsets into the source
105 * string. We have to convert them to 1-based character indexes for reporting
106 * to clients. (We do things this way to avoid unnecessary overhead in the
107 * normal non-error case: computing character indexes would be much more
108 * expensive than storing token offsets.)
109 */
110 int
parser_errposition(ParseState * pstate,int location)111 parser_errposition(ParseState *pstate, int location)
112 {
113 int pos;
114
115 /* No-op if location was not provided */
116 if (location < 0)
117 return 0;
118 /* Can't do anything if source text is not available */
119 if (pstate == NULL || pstate->p_sourcetext == NULL)
120 return 0;
121 /* Convert offset to character number */
122 pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
123 /* And pass it to the ereport mechanism */
124 return errposition(pos);
125 }
126
127
128 /*
129 * setup_parser_errposition_callback
130 * Arrange for non-parser errors to report an error position
131 *
132 * Sometimes the parser calls functions that aren't part of the parser
133 * subsystem and can't reasonably be passed a ParseState; yet we would
134 * like any errors thrown in those functions to be tagged with a parse
135 * error location. Use this function to set up an error context stack
136 * entry that will accomplish that. Usage pattern:
137 *
138 * declare a local variable "ParseCallbackState pcbstate"
139 * ...
140 * setup_parser_errposition_callback(&pcbstate, pstate, location);
141 * call function that might throw error;
142 * cancel_parser_errposition_callback(&pcbstate);
143 */
144 void
setup_parser_errposition_callback(ParseCallbackState * pcbstate,ParseState * pstate,int location)145 setup_parser_errposition_callback(ParseCallbackState *pcbstate,
146 ParseState *pstate, int location)
147 {
148 /* Setup error traceback support for ereport() */
149 pcbstate->pstate = pstate;
150 pcbstate->location = location;
151 pcbstate->errcallback.callback = pcb_error_callback;
152 pcbstate->errcallback.arg = (void *) pcbstate;
153 pcbstate->errcallback.previous = error_context_stack;
154 error_context_stack = &pcbstate->errcallback;
155 }
156
157 /*
158 * Cancel a previously-set-up errposition callback.
159 */
160 void
cancel_parser_errposition_callback(ParseCallbackState * pcbstate)161 cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
162 {
163 /* Pop the error context stack */
164 error_context_stack = pcbstate->errcallback.previous;
165 }
166
167 /*
168 * Error context callback for inserting parser error location.
169 *
170 * Note that this will be called for *any* error occurring while the
171 * callback is installed. We avoid inserting an irrelevant error location
172 * if the error is a query cancel --- are there any other important cases?
173 */
174 static void
pcb_error_callback(void * arg)175 pcb_error_callback(void *arg)
176 {
177 ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
178
179 if (geterrcode() != ERRCODE_QUERY_CANCELED)
180 (void) parser_errposition(pcbstate->pstate, pcbstate->location);
181 }
182
183
184 /*
185 * make_var
186 * Build a Var node for an attribute identified by RTE and attrno
187 */
188 Var *
make_var(ParseState * pstate,RangeTblEntry * rte,int attrno,int location)189 make_var(ParseState *pstate, RangeTblEntry *rte, int attrno, int location)
190 {
191 Var *result;
192 int vnum,
193 sublevels_up;
194 Oid vartypeid;
195 int32 type_mod;
196 Oid varcollid;
197
198 vnum = RTERangeTablePosn(pstate, rte, &sublevels_up);
199 get_rte_attribute_type(rte, attrno, &vartypeid, &type_mod, &varcollid);
200 result = makeVar(vnum, attrno, vartypeid, type_mod, varcollid, sublevels_up);
201 result->location = location;
202 return result;
203 }
204
205 /*
206 * transformArrayType()
207 * Identify the types involved in a subscripting operation
208 *
209 * On entry, arrayType/arrayTypmod identify the type of the input value
210 * to be subscripted (which could be a domain type). These are modified
211 * if necessary to identify the actual array type and typmod, and the
212 * array's element type is returned. An error is thrown if the input isn't
213 * an array type.
214 */
215 Oid
transformArrayType(Oid * arrayType,int32 * arrayTypmod)216 transformArrayType(Oid *arrayType, int32 *arrayTypmod)
217 {
218 Oid origArrayType = *arrayType;
219 Oid elementType;
220 HeapTuple type_tuple_array;
221 Form_pg_type type_struct_array;
222
223 /*
224 * If the input is a domain, smash to base type, and extract the actual
225 * typmod to be applied to the base type. Subscripting a domain is an
226 * operation that necessarily works on the base array type, not the domain
227 * itself. (Note that we provide no method whereby the creator of a
228 * domain over an array type could hide its ability to be subscripted.)
229 */
230 *arrayType = getBaseTypeAndTypmod(*arrayType, arrayTypmod);
231
232 /*
233 * We treat int2vector and oidvector as though they were domains over
234 * int2[] and oid[]. This is needed because array slicing could create an
235 * array that doesn't satisfy the dimensionality constraints of the
236 * xxxvector type; so we want the result of a slice operation to be
237 * considered to be of the more general type.
238 */
239 if (*arrayType == INT2VECTOROID)
240 *arrayType = INT2ARRAYOID;
241 else if (*arrayType == OIDVECTOROID)
242 *arrayType = OIDARRAYOID;
243
244 /* Get the type tuple for the array */
245 type_tuple_array = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*arrayType));
246 if (!HeapTupleIsValid(type_tuple_array))
247 elog(ERROR, "cache lookup failed for type %u", *arrayType);
248 type_struct_array = (Form_pg_type) GETSTRUCT(type_tuple_array);
249
250 /* needn't check typisdefined since this will fail anyway */
251
252 elementType = type_struct_array->typelem;
253 if (elementType == InvalidOid)
254 ereport(ERROR,
255 (errcode(ERRCODE_DATATYPE_MISMATCH),
256 errmsg("cannot subscript type %s because it is not an array",
257 format_type_be(origArrayType))));
258
259 ReleaseSysCache(type_tuple_array);
260
261 return elementType;
262 }
263
264 /*
265 * transformArraySubscripts()
266 * Transform array subscripting. This is used for both
267 * array fetch and array assignment.
268 *
269 * In an array fetch, we are given a source array value and we produce an
270 * expression that represents the result of extracting a single array element
271 * or an array slice.
272 *
273 * In an array assignment, we are given a destination array value plus a
274 * source value that is to be assigned to a single element or a slice of
275 * that array. We produce an expression that represents the new array value
276 * with the source data inserted into the right part of the array.
277 *
278 * For both cases, if the source array is of a domain-over-array type,
279 * the result is of the base array type or its element type; essentially,
280 * we must fold a domain to its base type before applying subscripting.
281 * (Note that int2vector and oidvector are treated as domains here.)
282 *
283 * pstate Parse state
284 * arrayBase Already-transformed expression for the array as a whole
285 * arrayType OID of array's datatype (should match type of arrayBase,
286 * or be the base type of arrayBase's domain type)
287 * elementType OID of array's element type (fetch with transformArrayType,
288 * or pass InvalidOid to do it here)
289 * arrayTypMod typmod for the array (which is also typmod for the elements)
290 * indirection Untransformed list of subscripts (must not be NIL)
291 * assignFrom NULL for array fetch, else transformed expression for source.
292 */
293 ArrayRef *
transformArraySubscripts(ParseState * pstate,Node * arrayBase,Oid arrayType,Oid elementType,int32 arrayTypMod,List * indirection,Node * assignFrom)294 transformArraySubscripts(ParseState *pstate,
295 Node *arrayBase,
296 Oid arrayType,
297 Oid elementType,
298 int32 arrayTypMod,
299 List *indirection,
300 Node *assignFrom)
301 {
302 bool isSlice = false;
303 List *upperIndexpr = NIL;
304 List *lowerIndexpr = NIL;
305 ListCell *idx;
306 ArrayRef *aref;
307
308 /*
309 * Caller may or may not have bothered to determine elementType. Note
310 * that if the caller did do so, arrayType/arrayTypMod must be as modified
311 * by transformArrayType, ie, smash domain to base type.
312 */
313 if (!OidIsValid(elementType))
314 elementType = transformArrayType(&arrayType, &arrayTypMod);
315
316 /*
317 * A list containing only simple subscripts refers to a single array
318 * element. If any of the items are slice specifiers (lower:upper), then
319 * the subscript expression means an array slice operation. In this case,
320 * we convert any non-slice items to slices by treating the single
321 * subscript as the upper bound and supplying an assumed lower bound of 1.
322 * We have to prescan the list to see if there are any slice items.
323 */
324 foreach(idx, indirection)
325 {
326 A_Indices *ai = (A_Indices *) lfirst(idx);
327
328 if (ai->is_slice)
329 {
330 isSlice = true;
331 break;
332 }
333 }
334
335 /*
336 * Transform the subscript expressions.
337 */
338 foreach(idx, indirection)
339 {
340 A_Indices *ai = lfirst_node(A_Indices, idx);
341 Node *subexpr;
342
343 if (isSlice)
344 {
345 if (ai->lidx)
346 {
347 subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind);
348 /* If it's not int4 already, try to coerce */
349 subexpr = coerce_to_target_type(pstate,
350 subexpr, exprType(subexpr),
351 INT4OID, -1,
352 COERCION_ASSIGNMENT,
353 COERCE_IMPLICIT_CAST,
354 -1);
355 if (subexpr == NULL)
356 ereport(ERROR,
357 (errcode(ERRCODE_DATATYPE_MISMATCH),
358 errmsg("array subscript must have type integer"),
359 parser_errposition(pstate, exprLocation(ai->lidx))));
360 }
361 else if (!ai->is_slice)
362 {
363 /* Make a constant 1 */
364 subexpr = (Node *) makeConst(INT4OID,
365 -1,
366 InvalidOid,
367 sizeof(int32),
368 Int32GetDatum(1),
369 false,
370 true); /* pass by value */
371 }
372 else
373 {
374 /* Slice with omitted lower bound, put NULL into the list */
375 subexpr = NULL;
376 }
377 lowerIndexpr = lappend(lowerIndexpr, subexpr);
378 }
379 else
380 Assert(ai->lidx == NULL && !ai->is_slice);
381
382 if (ai->uidx)
383 {
384 subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
385 /* If it's not int4 already, try to coerce */
386 subexpr = coerce_to_target_type(pstate,
387 subexpr, exprType(subexpr),
388 INT4OID, -1,
389 COERCION_ASSIGNMENT,
390 COERCE_IMPLICIT_CAST,
391 -1);
392 if (subexpr == NULL)
393 ereport(ERROR,
394 (errcode(ERRCODE_DATATYPE_MISMATCH),
395 errmsg("array subscript must have type integer"),
396 parser_errposition(pstate, exprLocation(ai->uidx))));
397 }
398 else
399 {
400 /* Slice with omitted upper bound, put NULL into the list */
401 Assert(isSlice && ai->is_slice);
402 subexpr = NULL;
403 }
404 upperIndexpr = lappend(upperIndexpr, subexpr);
405 }
406
407 /*
408 * If doing an array store, coerce the source value to the right type.
409 * (This should agree with the coercion done by transformAssignedExpr.)
410 */
411 if (assignFrom != NULL)
412 {
413 Oid typesource = exprType(assignFrom);
414 Oid typeneeded = isSlice ? arrayType : elementType;
415 Node *newFrom;
416
417 newFrom = coerce_to_target_type(pstate,
418 assignFrom, typesource,
419 typeneeded, arrayTypMod,
420 COERCION_ASSIGNMENT,
421 COERCE_IMPLICIT_CAST,
422 -1);
423 if (newFrom == NULL)
424 ereport(ERROR,
425 (errcode(ERRCODE_DATATYPE_MISMATCH),
426 errmsg("array assignment requires type %s"
427 " but expression is of type %s",
428 format_type_be(typeneeded),
429 format_type_be(typesource)),
430 errhint("You will need to rewrite or cast the expression."),
431 parser_errposition(pstate, exprLocation(assignFrom))));
432 assignFrom = newFrom;
433 }
434
435 /*
436 * Ready to build the ArrayRef node.
437 */
438 aref = makeNode(ArrayRef);
439 aref->refarraytype = arrayType;
440 aref->refelemtype = elementType;
441 aref->reftypmod = arrayTypMod;
442 /* refcollid will be set by parse_collate.c */
443 aref->refupperindexpr = upperIndexpr;
444 aref->reflowerindexpr = lowerIndexpr;
445 aref->refexpr = (Expr *) arrayBase;
446 aref->refassgnexpr = (Expr *) assignFrom;
447
448 return aref;
449 }
450
451 /*
452 * make_const
453 *
454 * Convert a Value node (as returned by the grammar) to a Const node
455 * of the "natural" type for the constant. Note that this routine is
456 * only used when there is no explicit cast for the constant, so we
457 * have to guess what type is wanted.
458 *
459 * For string literals we produce a constant of type UNKNOWN ---- whose
460 * representation is the same as cstring, but it indicates to later type
461 * resolution that we're not sure yet what type it should be considered.
462 * Explicit "NULL" constants are also typed as UNKNOWN.
463 *
464 * For integers and floats we produce int4, int8, or numeric depending
465 * on the value of the number. XXX We should produce int2 as well,
466 * but additional cleanup is needed before we can do that; there are
467 * too many examples that fail if we try.
468 */
469 Const *
make_const(ParseState * pstate,Value * value,int location)470 make_const(ParseState *pstate, Value *value, int location)
471 {
472 Const *con;
473 Datum val;
474 int64 val64;
475 Oid typeid;
476 int typelen;
477 bool typebyval;
478 ParseCallbackState pcbstate;
479
480 switch (nodeTag(value))
481 {
482 case T_Integer:
483 val = Int32GetDatum(intVal(value));
484
485 typeid = INT4OID;
486 typelen = sizeof(int32);
487 typebyval = true;
488 break;
489
490 case T_Float:
491 /* could be an oversize integer as well as a float ... */
492 if (scanint8(strVal(value), true, &val64))
493 {
494 /*
495 * It might actually fit in int32. Probably only INT_MIN can
496 * occur, but we'll code the test generally just to be sure.
497 */
498 int32 val32 = (int32) val64;
499
500 if (val64 == (int64) val32)
501 {
502 val = Int32GetDatum(val32);
503
504 typeid = INT4OID;
505 typelen = sizeof(int32);
506 typebyval = true;
507 }
508 else
509 {
510 val = Int64GetDatum(val64);
511
512 typeid = INT8OID;
513 typelen = sizeof(int64);
514 typebyval = FLOAT8PASSBYVAL; /* int8 and float8 alike */
515 }
516 }
517 else
518 {
519 /* arrange to report location if numeric_in() fails */
520 setup_parser_errposition_callback(&pcbstate, pstate, location);
521 val = DirectFunctionCall3(numeric_in,
522 CStringGetDatum(strVal(value)),
523 ObjectIdGetDatum(InvalidOid),
524 Int32GetDatum(-1));
525 cancel_parser_errposition_callback(&pcbstate);
526
527 typeid = NUMERICOID;
528 typelen = -1; /* variable len */
529 typebyval = false;
530 }
531 break;
532
533 case T_String:
534
535 /*
536 * We assume here that UNKNOWN's internal representation is the
537 * same as CSTRING
538 */
539 val = CStringGetDatum(strVal(value));
540
541 typeid = UNKNOWNOID; /* will be coerced later */
542 typelen = -2; /* cstring-style varwidth type */
543 typebyval = false;
544 break;
545
546 case T_BitString:
547 /* arrange to report location if bit_in() fails */
548 setup_parser_errposition_callback(&pcbstate, pstate, location);
549 val = DirectFunctionCall3(bit_in,
550 CStringGetDatum(strVal(value)),
551 ObjectIdGetDatum(InvalidOid),
552 Int32GetDatum(-1));
553 cancel_parser_errposition_callback(&pcbstate);
554 typeid = BITOID;
555 typelen = -1;
556 typebyval = false;
557 break;
558
559 case T_Null:
560 /* return a null const */
561 con = makeConst(UNKNOWNOID,
562 -1,
563 InvalidOid,
564 -2,
565 (Datum) 0,
566 true,
567 false);
568 con->location = location;
569 return con;
570
571 default:
572 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(value));
573 return NULL; /* keep compiler quiet */
574 }
575
576 con = makeConst(typeid,
577 -1, /* typmod -1 is OK for all cases */
578 InvalidOid, /* all cases are uncollatable types */
579 typelen,
580 val,
581 false,
582 typebyval);
583 con->location = location;
584
585 return con;
586 }
587