1 /*-------------------------------------------------------------------------
2  *
3  * parse_node.c
4  *	  various routines that make nodes for querytrees
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/parser/parse_node.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/htup_details.h"
18 #include "access/table.h"
19 #include "catalog/pg_type.h"
20 #include "mb/pg_wchar.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "parser/parse_coerce.h"
24 #include "parser/parse_expr.h"
25 #include "parser/parse_relation.h"
26 #include "parser/parsetree.h"
27 #include "utils/builtins.h"
28 #include "utils/int8.h"
29 #include "utils/lsyscache.h"
30 #include "utils/syscache.h"
31 #include "utils/varbit.h"
32 
33 static void pcb_error_callback(void *arg);
34 
35 
36 /*
37  * make_parsestate
38  *		Allocate and initialize a new ParseState.
39  *
40  * Caller should eventually release the ParseState via free_parsestate().
41  */
42 ParseState *
make_parsestate(ParseState * parentParseState)43 make_parsestate(ParseState *parentParseState)
44 {
45 	ParseState *pstate;
46 
47 	pstate = palloc0(sizeof(ParseState));
48 
49 	pstate->parentParseState = parentParseState;
50 
51 	/* Fill in fields that don't start at null/false/zero */
52 	pstate->p_next_resno = 1;
53 	pstate->p_resolve_unknowns = true;
54 
55 	if (parentParseState)
56 	{
57 		pstate->p_sourcetext = parentParseState->p_sourcetext;
58 		/* all hooks are copied from parent */
59 		pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
60 		pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
61 		pstate->p_paramref_hook = parentParseState->p_paramref_hook;
62 		pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
63 		pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
64 		/* query environment stays in context for the whole parse analysis */
65 		pstate->p_queryEnv = parentParseState->p_queryEnv;
66 	}
67 
68 	return pstate;
69 }
70 
71 /*
72  * free_parsestate
73  *		Release a ParseState and any subsidiary resources.
74  */
75 void
free_parsestate(ParseState * pstate)76 free_parsestate(ParseState *pstate)
77 {
78 	/*
79 	 * Check that we did not produce too many resnos; at the very least we
80 	 * cannot allow more than 2^16, since that would exceed the range of a
81 	 * AttrNumber. It seems safest to use MaxTupleAttributeNumber.
82 	 */
83 	if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
84 		ereport(ERROR,
85 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
86 				 errmsg("target lists can have at most %d entries",
87 						MaxTupleAttributeNumber)));
88 
89 	if (pstate->p_target_relation != NULL)
90 		table_close(pstate->p_target_relation, NoLock);
91 
92 	pfree(pstate);
93 }
94 
95 
96 /*
97  * parser_errposition
98  *		Report a parse-analysis-time cursor position, if possible.
99  *
100  * This is expected to be used within an ereport() call.  The return value
101  * is a dummy (always 0, in fact).
102  *
103  * The locations stored in raw parsetrees are byte offsets into the source
104  * string.  We have to convert them to 1-based character indexes for reporting
105  * to clients.  (We do things this way to avoid unnecessary overhead in the
106  * normal non-error case: computing character indexes would be much more
107  * expensive than storing token offsets.)
108  */
109 int
parser_errposition(ParseState * pstate,int location)110 parser_errposition(ParseState *pstate, int location)
111 {
112 	int			pos;
113 
114 	/* No-op if location was not provided */
115 	if (location < 0)
116 		return 0;
117 	/* Can't do anything if source text is not available */
118 	if (pstate == NULL || pstate->p_sourcetext == NULL)
119 		return 0;
120 	/* Convert offset to character number */
121 	pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
122 	/* And pass it to the ereport mechanism */
123 	return errposition(pos);
124 }
125 
126 
127 /*
128  * setup_parser_errposition_callback
129  *		Arrange for non-parser errors to report an error position
130  *
131  * Sometimes the parser calls functions that aren't part of the parser
132  * subsystem and can't reasonably be passed a ParseState; yet we would
133  * like any errors thrown in those functions to be tagged with a parse
134  * error location.  Use this function to set up an error context stack
135  * entry that will accomplish that.  Usage pattern:
136  *
137  *		declare a local variable "ParseCallbackState pcbstate"
138  *		...
139  *		setup_parser_errposition_callback(&pcbstate, pstate, location);
140  *		call function that might throw error;
141  *		cancel_parser_errposition_callback(&pcbstate);
142  */
143 void
setup_parser_errposition_callback(ParseCallbackState * pcbstate,ParseState * pstate,int location)144 setup_parser_errposition_callback(ParseCallbackState *pcbstate,
145 								  ParseState *pstate, int location)
146 {
147 	/* Setup error traceback support for ereport() */
148 	pcbstate->pstate = pstate;
149 	pcbstate->location = location;
150 	pcbstate->errcallback.callback = pcb_error_callback;
151 	pcbstate->errcallback.arg = (void *) pcbstate;
152 	pcbstate->errcallback.previous = error_context_stack;
153 	error_context_stack = &pcbstate->errcallback;
154 }
155 
156 /*
157  * Cancel a previously-set-up errposition callback.
158  */
159 void
cancel_parser_errposition_callback(ParseCallbackState * pcbstate)160 cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
161 {
162 	/* Pop the error context stack */
163 	error_context_stack = pcbstate->errcallback.previous;
164 }
165 
166 /*
167  * Error context callback for inserting parser error location.
168  *
169  * Note that this will be called for *any* error occurring while the
170  * callback is installed.  We avoid inserting an irrelevant error location
171  * if the error is a query cancel --- are there any other important cases?
172  */
173 static void
pcb_error_callback(void * arg)174 pcb_error_callback(void *arg)
175 {
176 	ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
177 
178 	if (geterrcode() != ERRCODE_QUERY_CANCELED)
179 		(void) parser_errposition(pcbstate->pstate, pcbstate->location);
180 }
181 
182 
183 /*
184  * transformContainerType()
185  *		Identify the types involved in a subscripting operation for container
186  *
187  *
188  * On entry, containerType/containerTypmod identify the type of the input value
189  * to be subscripted (which could be a domain type).  These are modified if
190  * necessary to identify the actual container type and typmod, and the
191  * container's element type is returned.  An error is thrown if the input isn't
192  * an array type.
193  */
194 Oid
transformContainerType(Oid * containerType,int32 * containerTypmod)195 transformContainerType(Oid *containerType, int32 *containerTypmod)
196 {
197 	Oid			origContainerType = *containerType;
198 	Oid			elementType;
199 	HeapTuple	type_tuple_container;
200 	Form_pg_type type_struct_container;
201 
202 	/*
203 	 * If the input is a domain, smash to base type, and extract the actual
204 	 * typmod to be applied to the base type. Subscripting a domain is an
205 	 * operation that necessarily works on the base container type, not the
206 	 * domain itself. (Note that we provide no method whereby the creator of a
207 	 * domain over a container type could hide its ability to be subscripted.)
208 	 */
209 	*containerType = getBaseTypeAndTypmod(*containerType, containerTypmod);
210 
211 	/*
212 	 * Here is an array specific code. We treat int2vector and oidvector as
213 	 * though they were domains over int2[] and oid[].  This is needed because
214 	 * array slicing could create an array that doesn't satisfy the
215 	 * dimensionality constraints of the xxxvector type; so we want the result
216 	 * of a slice operation to be considered to be of the more general type.
217 	 */
218 	if (*containerType == INT2VECTOROID)
219 		*containerType = INT2ARRAYOID;
220 	else if (*containerType == OIDVECTOROID)
221 		*containerType = OIDARRAYOID;
222 
223 	/* Get the type tuple for the container */
224 	type_tuple_container = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*containerType));
225 	if (!HeapTupleIsValid(type_tuple_container))
226 		elog(ERROR, "cache lookup failed for type %u", *containerType);
227 	type_struct_container = (Form_pg_type) GETSTRUCT(type_tuple_container);
228 
229 	/* needn't check typisdefined since this will fail anyway */
230 
231 	elementType = type_struct_container->typelem;
232 	if (elementType == InvalidOid)
233 		ereport(ERROR,
234 				(errcode(ERRCODE_DATATYPE_MISMATCH),
235 				 errmsg("cannot subscript type %s because it is not an array",
236 						format_type_be(origContainerType))));
237 
238 	ReleaseSysCache(type_tuple_container);
239 
240 	return elementType;
241 }
242 
243 /*
244  * transformContainerSubscripts()
245  *		Transform container (array, etc) subscripting.  This is used for both
246  *		container fetch and container assignment.
247  *
248  * In a container fetch, we are given a source container value and we produce
249  * an expression that represents the result of extracting a single container
250  * element or a container slice.
251  *
252  * In a container assignment, we are given a destination container value plus a
253  * source value that is to be assigned to a single element or a slice of that
254  * container. We produce an expression that represents the new container value
255  * with the source data inserted into the right part of the container.
256  *
257  * For both cases, if the source container is of a domain-over-array type,
258  * the result is of the base array type or its element type; essentially,
259  * we must fold a domain to its base type before applying subscripting.
260  * (Note that int2vector and oidvector are treated as domains here.)
261  *
262  * pstate			Parse state
263  * containerBase	Already-transformed expression for the container as a whole
264  * containerType	OID of container's datatype (should match type of
265  *					containerBase, or be the base type of containerBase's
266  *					domain type)
267  * elementType		OID of container's element type (fetch with
268  *					transformContainerType, or pass InvalidOid to do it here)
269  * containerTypMod	typmod for the container (which is also typmod for the
270  *					elements)
271  * indirection		Untransformed list of subscripts (must not be NIL)
272  * assignFrom		NULL for container fetch, else transformed expression for
273  *					source.
274  */
275 SubscriptingRef *
transformContainerSubscripts(ParseState * pstate,Node * containerBase,Oid containerType,Oid elementType,int32 containerTypMod,List * indirection,Node * assignFrom)276 transformContainerSubscripts(ParseState *pstate,
277 							 Node *containerBase,
278 							 Oid containerType,
279 							 Oid elementType,
280 							 int32 containerTypMod,
281 							 List *indirection,
282 							 Node *assignFrom)
283 {
284 	bool		isSlice = false;
285 	List	   *upperIndexpr = NIL;
286 	List	   *lowerIndexpr = NIL;
287 	ListCell   *idx;
288 	SubscriptingRef *sbsref;
289 
290 	/*
291 	 * Caller may or may not have bothered to determine elementType.  Note
292 	 * that if the caller did do so, containerType/containerTypMod must be as
293 	 * modified by transformContainerType, ie, smash domain to base type.
294 	 */
295 	if (!OidIsValid(elementType))
296 		elementType = transformContainerType(&containerType, &containerTypMod);
297 
298 	/*
299 	 * A list containing only simple subscripts refers to a single container
300 	 * element.  If any of the items are slice specifiers (lower:upper), then
301 	 * the subscript expression means a container slice operation.  In this
302 	 * case, we convert any non-slice items to slices by treating the single
303 	 * subscript as the upper bound and supplying an assumed lower bound of 1.
304 	 * We have to prescan the list to see if there are any slice items.
305 	 */
306 	foreach(idx, indirection)
307 	{
308 		A_Indices  *ai = (A_Indices *) lfirst(idx);
309 
310 		if (ai->is_slice)
311 		{
312 			isSlice = true;
313 			break;
314 		}
315 	}
316 
317 	/*
318 	 * Transform the subscript expressions.
319 	 */
320 	foreach(idx, indirection)
321 	{
322 		A_Indices  *ai = lfirst_node(A_Indices, idx);
323 		Node	   *subexpr;
324 
325 		if (isSlice)
326 		{
327 			if (ai->lidx)
328 			{
329 				subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind);
330 				/* If it's not int4 already, try to coerce */
331 				subexpr = coerce_to_target_type(pstate,
332 												subexpr, exprType(subexpr),
333 												INT4OID, -1,
334 												COERCION_ASSIGNMENT,
335 												COERCE_IMPLICIT_CAST,
336 												-1);
337 				if (subexpr == NULL)
338 					ereport(ERROR,
339 							(errcode(ERRCODE_DATATYPE_MISMATCH),
340 							 errmsg("array subscript must have type integer"),
341 							 parser_errposition(pstate, exprLocation(ai->lidx))));
342 			}
343 			else if (!ai->is_slice)
344 			{
345 				/* Make a constant 1 */
346 				subexpr = (Node *) makeConst(INT4OID,
347 											 -1,
348 											 InvalidOid,
349 											 sizeof(int32),
350 											 Int32GetDatum(1),
351 											 false,
352 											 true); /* pass by value */
353 			}
354 			else
355 			{
356 				/* Slice with omitted lower bound, put NULL into the list */
357 				subexpr = NULL;
358 			}
359 			lowerIndexpr = lappend(lowerIndexpr, subexpr);
360 		}
361 		else
362 			Assert(ai->lidx == NULL && !ai->is_slice);
363 
364 		if (ai->uidx)
365 		{
366 			subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
367 			/* If it's not int4 already, try to coerce */
368 			subexpr = coerce_to_target_type(pstate,
369 											subexpr, exprType(subexpr),
370 											INT4OID, -1,
371 											COERCION_ASSIGNMENT,
372 											COERCE_IMPLICIT_CAST,
373 											-1);
374 			if (subexpr == NULL)
375 				ereport(ERROR,
376 						(errcode(ERRCODE_DATATYPE_MISMATCH),
377 						 errmsg("array subscript must have type integer"),
378 						 parser_errposition(pstate, exprLocation(ai->uidx))));
379 		}
380 		else
381 		{
382 			/* Slice with omitted upper bound, put NULL into the list */
383 			Assert(isSlice && ai->is_slice);
384 			subexpr = NULL;
385 		}
386 		upperIndexpr = lappend(upperIndexpr, subexpr);
387 	}
388 
389 	/*
390 	 * If doing an array store, coerce the source value to the right type.
391 	 * (This should agree with the coercion done by transformAssignedExpr.)
392 	 */
393 	if (assignFrom != NULL)
394 	{
395 		Oid			typesource = exprType(assignFrom);
396 		Oid			typeneeded = isSlice ? containerType : elementType;
397 		Node	   *newFrom;
398 
399 		newFrom = coerce_to_target_type(pstate,
400 										assignFrom, typesource,
401 										typeneeded, containerTypMod,
402 										COERCION_ASSIGNMENT,
403 										COERCE_IMPLICIT_CAST,
404 										-1);
405 		if (newFrom == NULL)
406 			ereport(ERROR,
407 					(errcode(ERRCODE_DATATYPE_MISMATCH),
408 					 errmsg("array assignment requires type %s"
409 							" but expression is of type %s",
410 							format_type_be(typeneeded),
411 							format_type_be(typesource)),
412 					 errhint("You will need to rewrite or cast the expression."),
413 					 parser_errposition(pstate, exprLocation(assignFrom))));
414 		assignFrom = newFrom;
415 	}
416 
417 	/*
418 	 * Ready to build the SubscriptingRef node.
419 	 */
420 	sbsref = (SubscriptingRef *) makeNode(SubscriptingRef);
421 	if (assignFrom != NULL)
422 		sbsref->refassgnexpr = (Expr *) assignFrom;
423 
424 	sbsref->refcontainertype = containerType;
425 	sbsref->refelemtype = elementType;
426 	sbsref->reftypmod = containerTypMod;
427 	/* refcollid will be set by parse_collate.c */
428 	sbsref->refupperindexpr = upperIndexpr;
429 	sbsref->reflowerindexpr = lowerIndexpr;
430 	sbsref->refexpr = (Expr *) containerBase;
431 	sbsref->refassgnexpr = (Expr *) assignFrom;
432 
433 	return sbsref;
434 }
435 
436 /*
437  * make_const
438  *
439  *	Convert a Value node (as returned by the grammar) to a Const node
440  *	of the "natural" type for the constant.  Note that this routine is
441  *	only used when there is no explicit cast for the constant, so we
442  *	have to guess what type is wanted.
443  *
444  *	For string literals we produce a constant of type UNKNOWN ---- whose
445  *	representation is the same as cstring, but it indicates to later type
446  *	resolution that we're not sure yet what type it should be considered.
447  *	Explicit "NULL" constants are also typed as UNKNOWN.
448  *
449  *	For integers and floats we produce int4, int8, or numeric depending
450  *	on the value of the number.  XXX We should produce int2 as well,
451  *	but additional cleanup is needed before we can do that; there are
452  *	too many examples that fail if we try.
453  */
454 Const *
make_const(ParseState * pstate,Value * value,int location)455 make_const(ParseState *pstate, Value *value, int location)
456 {
457 	Const	   *con;
458 	Datum		val;
459 	int64		val64;
460 	Oid			typeid;
461 	int			typelen;
462 	bool		typebyval;
463 	ParseCallbackState pcbstate;
464 
465 	switch (nodeTag(value))
466 	{
467 		case T_Integer:
468 			val = Int32GetDatum(intVal(value));
469 
470 			typeid = INT4OID;
471 			typelen = sizeof(int32);
472 			typebyval = true;
473 			break;
474 
475 		case T_Float:
476 			/* could be an oversize integer as well as a float ... */
477 			if (scanint8(strVal(value), true, &val64))
478 			{
479 				/*
480 				 * It might actually fit in int32. Probably only INT_MIN can
481 				 * occur, but we'll code the test generally just to be sure.
482 				 */
483 				int32		val32 = (int32) val64;
484 
485 				if (val64 == (int64) val32)
486 				{
487 					val = Int32GetDatum(val32);
488 
489 					typeid = INT4OID;
490 					typelen = sizeof(int32);
491 					typebyval = true;
492 				}
493 				else
494 				{
495 					val = Int64GetDatum(val64);
496 
497 					typeid = INT8OID;
498 					typelen = sizeof(int64);
499 					typebyval = FLOAT8PASSBYVAL;	/* int8 and float8 alike */
500 				}
501 			}
502 			else
503 			{
504 				/* arrange to report location if numeric_in() fails */
505 				setup_parser_errposition_callback(&pcbstate, pstate, location);
506 				val = DirectFunctionCall3(numeric_in,
507 										  CStringGetDatum(strVal(value)),
508 										  ObjectIdGetDatum(InvalidOid),
509 										  Int32GetDatum(-1));
510 				cancel_parser_errposition_callback(&pcbstate);
511 
512 				typeid = NUMERICOID;
513 				typelen = -1;	/* variable len */
514 				typebyval = false;
515 			}
516 			break;
517 
518 		case T_String:
519 
520 			/*
521 			 * We assume here that UNKNOWN's internal representation is the
522 			 * same as CSTRING
523 			 */
524 			val = CStringGetDatum(strVal(value));
525 
526 			typeid = UNKNOWNOID;	/* will be coerced later */
527 			typelen = -2;		/* cstring-style varwidth type */
528 			typebyval = false;
529 			break;
530 
531 		case T_BitString:
532 			/* arrange to report location if bit_in() fails */
533 			setup_parser_errposition_callback(&pcbstate, pstate, location);
534 			val = DirectFunctionCall3(bit_in,
535 									  CStringGetDatum(strVal(value)),
536 									  ObjectIdGetDatum(InvalidOid),
537 									  Int32GetDatum(-1));
538 			cancel_parser_errposition_callback(&pcbstate);
539 			typeid = BITOID;
540 			typelen = -1;
541 			typebyval = false;
542 			break;
543 
544 		case T_Null:
545 			/* return a null const */
546 			con = makeConst(UNKNOWNOID,
547 							-1,
548 							InvalidOid,
549 							-2,
550 							(Datum) 0,
551 							true,
552 							false);
553 			con->location = location;
554 			return con;
555 
556 		default:
557 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(value));
558 			return NULL;		/* keep compiler quiet */
559 	}
560 
561 	con = makeConst(typeid,
562 					-1,			/* typmod -1 is OK for all cases */
563 					InvalidOid, /* all cases are uncollatable types */
564 					typelen,
565 					val,
566 					false,
567 					typebyval);
568 	con->location = location;
569 
570 	return con;
571 }
572