1 /*-------------------------------------------------------------------------
2  *
3  * parse_node.c
4  *	  various routines that make nodes for querytrees
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/parser/parse_node.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/htup_details.h"
18 #include "access/table.h"
19 #include "catalog/pg_type.h"
20 #include "mb/pg_wchar.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "parser/parsetree.h"
24 #include "parser/parse_coerce.h"
25 #include "parser/parse_expr.h"
26 #include "parser/parse_relation.h"
27 #include "utils/builtins.h"
28 #include "utils/int8.h"
29 #include "utils/lsyscache.h"
30 #include "utils/syscache.h"
31 #include "utils/varbit.h"
32 
33 
34 static void pcb_error_callback(void *arg);
35 
36 
37 /*
38  * make_parsestate
getEvent()39  *		Allocate and initialize a new ParseState.
40  *
41  * Caller should eventually release the ParseState via free_parsestate().
42  */
43 ParseState *
44 make_parsestate(ParseState *parentParseState)
45 {
46 	ParseState *pstate;
47 
48 	pstate = palloc0(sizeof(ParseState));
49 
50 	pstate->parentParseState = parentParseState;
51 
52 	/* Fill in fields that don't start at null/false/zero */
53 	pstate->p_next_resno = 1;
54 	pstate->p_resolve_unknowns = true;
55 
56 	if (parentParseState)
57 	{
58 		pstate->p_sourcetext = parentParseState->p_sourcetext;
59 		/* all hooks are copied from parent */
60 		pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
61 		pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
62 		pstate->p_paramref_hook = parentParseState->p_paramref_hook;
63 		pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
64 		pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
65 		/* query environment stays in context for the whole parse analysis */
66 		pstate->p_queryEnv = parentParseState->p_queryEnv;
67 	}
68 
69 	return pstate;
70 }
71 
72 /*
73  * free_parsestate
74  *		Release a ParseState and any subsidiary resources.
75  */
76 void
77 free_parsestate(ParseState *pstate)
78 {
79 	/*
80 	 * Check that we did not produce too many resnos; at the very least we
81 	 * cannot allow more than 2^16, since that would exceed the range of a
82 	 * AttrNumber. It seems safest to use MaxTupleAttributeNumber.
83 	 */
84 	if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
85 		ereport(ERROR,
86 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
87 				 errmsg("target lists can have at most %d entries",
88 						MaxTupleAttributeNumber)));
89 
90 	if (pstate->p_target_relation != NULL)
91 		table_close(pstate->p_target_relation, NoLock);
92 
93 	pfree(pstate);
94 }
95 
96 
97 /*
98  * parser_errposition
99  *		Report a parse-analysis-time cursor position, if possible.
100  *
101  * This is expected to be used within an ereport() call.  The return value
102  * is a dummy (always 0, in fact).
103  *
104  * The locations stored in raw parsetrees are byte offsets into the source
105  * string.  We have to convert them to 1-based character indexes for reporting
106  * to clients.  (We do things this way to avoid unnecessary overhead in the
107  * normal non-error case: computing character indexes would be much more
108  * expensive than storing token offsets.)
109  */
110 int
111 parser_errposition(ParseState *pstate, int location)
112 {
113 	int			pos;
114 
115 	/* No-op if location was not provided */
116 	if (location < 0)
117 		return 0;
118 	/* Can't do anything if source text is not available */
119 	if (pstate == NULL || pstate->p_sourcetext == NULL)
120 		return 0;
121 	/* Convert offset to character number */
122 	pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
123 	/* And pass it to the ereport mechanism */
124 	return errposition(pos);
125 }
126 
127 
128 /*
129  * setup_parser_errposition_callback
130  *		Arrange for non-parser errors to report an error position
131  *
132  * Sometimes the parser calls functions that aren't part of the parser
133  * subsystem and can't reasonably be passed a ParseState; yet we would
134  * like any errors thrown in those functions to be tagged with a parse
135  * error location.  Use this function to set up an error context stack
136  * entry that will accomplish that.  Usage pattern:
137  *
138  *		declare a local variable "ParseCallbackState pcbstate"
139  *		...
140  *		setup_parser_errposition_callback(&pcbstate, pstate, location);
141  *		call function that might throw error;
142  *		cancel_parser_errposition_callback(&pcbstate);
143  */
144 void
145 setup_parser_errposition_callback(ParseCallbackState *pcbstate,
146 								  ParseState *pstate, int location)
147 {
148 	/* Setup error traceback support for ereport() */
149 	pcbstate->pstate = pstate;
150 	pcbstate->location = location;
151 	pcbstate->errcallback.callback = pcb_error_callback;
152 	pcbstate->errcallback.arg = (void *) pcbstate;
153 	pcbstate->errcallback.previous = error_context_stack;
154 	error_context_stack = &pcbstate->errcallback;
155 }
156 
157 /*
158  * Cancel a previously-set-up errposition callback.
159  */
160 void
161 cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
162 {
163 	/* Pop the error context stack */
164 	error_context_stack = pcbstate->errcallback.previous;
165 }
166 
167 /*
168  * Error context callback for inserting parser error location.
169  *
170  * Note that this will be called for *any* error occurring while the
171  * callback is installed.  We avoid inserting an irrelevant error location
172  * if the error is a query cancel --- are there any other important cases?
173  */
174 static void
175 pcb_error_callback(void *arg)
176 {
177 	ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
178 
179 	if (geterrcode() != ERRCODE_QUERY_CANCELED)
180 		(void) parser_errposition(pcbstate->pstate, pcbstate->location);
181 }
182 
183 
184 /*
185  * make_var
186  *		Build a Var node for an attribute identified by RTE and attrno
187  */
188 Var *
189 make_var(ParseState *pstate, RangeTblEntry *rte, int attrno, int location)
190 {
191 	Var		   *result;
192 	int			vnum,
193 				sublevels_up;
194 	Oid			vartypeid;
195 	int32		type_mod;
196 	Oid			varcollid;
197 
198 	vnum = RTERangeTablePosn(pstate, rte, &sublevels_up);
199 	get_rte_attribute_type(rte, attrno, &vartypeid, &type_mod, &varcollid);
200 	result = makeVar(vnum, attrno, vartypeid, type_mod, varcollid, sublevels_up);
201 	result->location = location;
202 	return result;
203 }
204 
205 /*
206  * transformContainerType()
207  *		Identify the types involved in a subscripting operation for container
208  *
209  *
210  * On entry, containerType/containerTypmod identify the type of the input value
211  * to be subscripted (which could be a domain type).  These are modified if
212  * necessary to identify the actual container type and typmod, and the
213  * container's element type is returned.  An error is thrown if the input isn't
214  * an array type.
215  */
216 Oid
217 transformContainerType(Oid *containerType, int32 *containerTypmod)
218 {
219 	Oid			origContainerType = *containerType;
220 	Oid			elementType;
221 	HeapTuple	type_tuple_container;
222 	Form_pg_type type_struct_container;
223 
224 	/*
225 	 * If the input is a domain, smash to base type, and extract the actual
226 	 * typmod to be applied to the base type. Subscripting a domain is an
227 	 * operation that necessarily works on the base container type, not the
228 	 * domain itself. (Note that we provide no method whereby the creator of a
229 	 * domain over a container type could hide its ability to be subscripted.)
230 	 */
231 	*containerType = getBaseTypeAndTypmod(*containerType, containerTypmod);
232 
233 	/*
234 	 * Here is an array specific code. We treat int2vector and oidvector as
235 	 * though they were domains over int2[] and oid[].  This is needed because
236 	 * array slicing could create an array that doesn't satisfy the
237 	 * dimensionality constraints of the xxxvector type; so we want the result
238 	 * of a slice operation to be considered to be of the more general type.
239 	 */
240 	if (*containerType == INT2VECTOROID)
241 		*containerType = INT2ARRAYOID;
242 	else if (*containerType == OIDVECTOROID)
243 		*containerType = OIDARRAYOID;
244 
245 	/* Get the type tuple for the container */
246 	type_tuple_container = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*containerType));
247 	if (!HeapTupleIsValid(type_tuple_container))
248 		elog(ERROR, "cache lookup failed for type %u", *containerType);
249 	type_struct_container = (Form_pg_type) GETSTRUCT(type_tuple_container);
250 
251 	/* needn't check typisdefined since this will fail anyway */
252 
253 	elementType = type_struct_container->typelem;
254 	if (elementType == InvalidOid)
255 		ereport(ERROR,
256 				(errcode(ERRCODE_DATATYPE_MISMATCH),
257 				 errmsg("cannot subscript type %s because it is not an array",
258 						format_type_be(origContainerType))));
259 
260 	ReleaseSysCache(type_tuple_container);
261 
262 	return elementType;
263 }
264 
265 /*
266  * transformContainerSubscripts()
267  *		Transform container (array, etc) subscripting.  This is used for both
268  *		container fetch and container assignment.
269  *
270  * In a container fetch, we are given a source container value and we produce
271  * an expression that represents the result of extracting a single container
272  * element or a container slice.
273  *
274  * In a container assignment, we are given a destination container value plus a
275  * source value that is to be assigned to a single element or a slice of that
276  * container. We produce an expression that represents the new container value
277  * with the source data inserted into the right part of the container.
278  *
279  * For both cases, if the source container is of a domain-over-array type,
280  * the result is of the base array type or its element type; essentially,
281  * we must fold a domain to its base type before applying subscripting.
282  * (Note that int2vector and oidvector are treated as domains here.)
283  *
284  * pstate			Parse state
285  * containerBase	Already-transformed expression for the container as a whole
286  * containerType	OID of container's datatype (should match type of
287  *					containerBase, or be the base type of containerBase's
288  *					domain type)
289  * elementType		OID of container's element type (fetch with
290  *					transformContainerType, or pass InvalidOid to do it here)
291  * containerTypMod	typmod for the container (which is also typmod for the
292  *					elements)
293  * indirection		Untransformed list of subscripts (must not be NIL)
294  * assignFrom		NULL for container fetch, else transformed expression for
295  *					source.
296  */
297 SubscriptingRef *
298 transformContainerSubscripts(ParseState *pstate,
299 							 Node *containerBase,
300 							 Oid containerType,
301 							 Oid elementType,
302 							 int32 containerTypMod,
303 							 List *indirection,
304 							 Node *assignFrom)
305 {
306 	bool		isSlice = false;
307 	List	   *upperIndexpr = NIL;
308 	List	   *lowerIndexpr = NIL;
309 	ListCell   *idx;
310 	SubscriptingRef *sbsref;
311 
312 	/*
313 	 * Caller may or may not have bothered to determine elementType.  Note
314 	 * that if the caller did do so, containerType/containerTypMod must be as
315 	 * modified by transformContainerType, ie, smash domain to base type.
316 	 */
317 	if (!OidIsValid(elementType))
318 		elementType = transformContainerType(&containerType, &containerTypMod);
319 
320 	/*
321 	 * A list containing only simple subscripts refers to a single container
322 	 * element.  If any of the items are slice specifiers (lower:upper), then
323 	 * the subscript expression means a container slice operation.  In this
324 	 * case, we convert any non-slice items to slices by treating the single
325 	 * subscript as the upper bound and supplying an assumed lower bound of 1.
326 	 * We have to prescan the list to see if there are any slice items.
327 	 */
328 	foreach(idx, indirection)
329 	{
330 		A_Indices  *ai = (A_Indices *) lfirst(idx);
331 
332 		if (ai->is_slice)
333 		{
334 			isSlice = true;
335 			break;
336 		}
337 	}
338 
339 	/*
340 	 * Transform the subscript expressions.
341 	 */
342 	foreach(idx, indirection)
343 	{
344 		A_Indices  *ai = lfirst_node(A_Indices, idx);
345 		Node	   *subexpr;
346 
347 		if (isSlice)
348 		{
349 			if (ai->lidx)
350 			{
351 				subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind);
352 				/* If it's not int4 already, try to coerce */
353 				subexpr = coerce_to_target_type(pstate,
354 												subexpr, exprType(subexpr),
355 												INT4OID, -1,
356 												COERCION_ASSIGNMENT,
357 												COERCE_IMPLICIT_CAST,
358 												-1);
359 				if (subexpr == NULL)
360 					ereport(ERROR,
361 							(errcode(ERRCODE_DATATYPE_MISMATCH),
362 							 errmsg("array subscript must have type integer"),
363 							 parser_errposition(pstate, exprLocation(ai->lidx))));
364 			}
365 			else if (!ai->is_slice)
366 			{
367 				/* Make a constant 1 */
368 				subexpr = (Node *) makeConst(INT4OID,
369 											 -1,
370 											 InvalidOid,
371 											 sizeof(int32),
372 											 Int32GetDatum(1),
373 											 false,
374 											 true); /* pass by value */
375 			}
376 			else
377 			{
378 				/* Slice with omitted lower bound, put NULL into the list */
379 				subexpr = NULL;
380 			}
381 			lowerIndexpr = lappend(lowerIndexpr, subexpr);
382 		}
383 		else
384 			Assert(ai->lidx == NULL && !ai->is_slice);
385 
386 		if (ai->uidx)
387 		{
388 			subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
389 			/* If it's not int4 already, try to coerce */
390 			subexpr = coerce_to_target_type(pstate,
391 											subexpr, exprType(subexpr),
392 											INT4OID, -1,
393 											COERCION_ASSIGNMENT,
394 											COERCE_IMPLICIT_CAST,
395 											-1);
396 			if (subexpr == NULL)
397 				ereport(ERROR,
398 						(errcode(ERRCODE_DATATYPE_MISMATCH),
399 						 errmsg("array subscript must have type integer"),
400 						 parser_errposition(pstate, exprLocation(ai->uidx))));
401 		}
402 		else
403 		{
404 			/* Slice with omitted upper bound, put NULL into the list */
405 			Assert(isSlice && ai->is_slice);
406 			subexpr = NULL;
407 		}
408 		upperIndexpr = lappend(upperIndexpr, subexpr);
409 	}
410 
411 	/*
412 	 * If doing an array store, coerce the source value to the right type.
413 	 * (This should agree with the coercion done by transformAssignedExpr.)
414 	 */
415 	if (assignFrom != NULL)
416 	{
417 		Oid			typesource = exprType(assignFrom);
418 		Oid			typeneeded = isSlice ? containerType : elementType;
419 		Node	   *newFrom;
420 
421 		newFrom = coerce_to_target_type(pstate,
422 										assignFrom, typesource,
423 										typeneeded, containerTypMod,
424 										COERCION_ASSIGNMENT,
425 										COERCE_IMPLICIT_CAST,
426 										-1);
427 		if (newFrom == NULL)
428 			ereport(ERROR,
429 					(errcode(ERRCODE_DATATYPE_MISMATCH),
430 					 errmsg("array assignment requires type %s"
431 							" but expression is of type %s",
432 							format_type_be(typeneeded),
433 							format_type_be(typesource)),
434 					 errhint("You will need to rewrite or cast the expression."),
435 					 parser_errposition(pstate, exprLocation(assignFrom))));
436 		assignFrom = newFrom;
437 	}
438 
439 	/*
440 	 * Ready to build the SubscriptingRef node.
441 	 */
442 	sbsref = (SubscriptingRef *) makeNode(SubscriptingRef);
443 	if (assignFrom != NULL)
444 		sbsref->refassgnexpr = (Expr *) assignFrom;
445 
446 	sbsref->refcontainertype = containerType;
447 	sbsref->refelemtype = elementType;
448 	sbsref->reftypmod = containerTypMod;
449 	/* refcollid will be set by parse_collate.c */
450 	sbsref->refupperindexpr = upperIndexpr;
451 	sbsref->reflowerindexpr = lowerIndexpr;
452 	sbsref->refexpr = (Expr *) containerBase;
453 	sbsref->refassgnexpr = (Expr *) assignFrom;
454 
455 	return sbsref;
456 }
457 
458 /*
459  * make_const
460  *
461  *	Convert a Value node (as returned by the grammar) to a Const node
462  *	of the "natural" type for the constant.  Note that this routine is
463  *	only used when there is no explicit cast for the constant, so we
464  *	have to guess what type is wanted.
465  *
466  *	For string literals we produce a constant of type UNKNOWN ---- whose
467  *	representation is the same as cstring, but it indicates to later type
468  *	resolution that we're not sure yet what type it should be considered.
469  *	Explicit "NULL" constants are also typed as UNKNOWN.
470  *
471  *	For integers and floats we produce int4, int8, or numeric depending
472  *	on the value of the number.  XXX We should produce int2 as well,
473  *	but additional cleanup is needed before we can do that; there are
474  *	too many examples that fail if we try.
475  */
476 Const *
477 make_const(ParseState *pstate, Value *value, int location)
478 {
479 	Const	   *con;
480 	Datum		val;
481 	int64		val64;
482 	Oid			typeid;
483 	int			typelen;
484 	bool		typebyval;
485 	ParseCallbackState pcbstate;
486 
487 	switch (nodeTag(value))
488 	{
489 		case T_Integer:
490 			val = Int32GetDatum(intVal(value));
491 
492 			typeid = INT4OID;
493 			typelen = sizeof(int32);
494 			typebyval = true;
495 			break;
496 
497 		case T_Float:
498 			/* could be an oversize integer as well as a float ... */
499 			if (scanint8(strVal(value), true, &val64))
500 			{
501 				/*
502 				 * It might actually fit in int32. Probably only INT_MIN can
503 				 * occur, but we'll code the test generally just to be sure.
504 				 */
505 				int32		val32 = (int32) val64;
506 
507 				if (val64 == (int64) val32)
508 				{
509 					val = Int32GetDatum(val32);
510 
511 					typeid = INT4OID;
512 					typelen = sizeof(int32);
513 					typebyval = true;
514 				}
515 				else
516 				{
517 					val = Int64GetDatum(val64);
518 
519 					typeid = INT8OID;
520 					typelen = sizeof(int64);
521 					typebyval = FLOAT8PASSBYVAL;	/* int8 and float8 alike */
522 				}
523 			}
524 			else
525 			{
526 				/* arrange to report location if numeric_in() fails */
527 				setup_parser_errposition_callback(&pcbstate, pstate, location);
528 				val = DirectFunctionCall3(numeric_in,
529 										  CStringGetDatum(strVal(value)),
530 										  ObjectIdGetDatum(InvalidOid),
531 										  Int32GetDatum(-1));
532 				cancel_parser_errposition_callback(&pcbstate);
533 
534 				typeid = NUMERICOID;
535 				typelen = -1;	/* variable len */
536 				typebyval = false;
537 			}
538 			break;
539 
540 		case T_String:
541 
542 			/*
543 			 * We assume here that UNKNOWN's internal representation is the
544 			 * same as CSTRING
545 			 */
546 			val = CStringGetDatum(strVal(value));
547 
548 			typeid = UNKNOWNOID;	/* will be coerced later */
549 			typelen = -2;		/* cstring-style varwidth type */
550 			typebyval = false;
551 			break;
552 
553 		case T_BitString:
554 			/* arrange to report location if bit_in() fails */
555 			setup_parser_errposition_callback(&pcbstate, pstate, location);
556 			val = DirectFunctionCall3(bit_in,
557 									  CStringGetDatum(strVal(value)),
558 									  ObjectIdGetDatum(InvalidOid),
559 									  Int32GetDatum(-1));
560 			cancel_parser_errposition_callback(&pcbstate);
561 			typeid = BITOID;
562 			typelen = -1;
563 			typebyval = false;
564 			break;
565 
566 		case T_Null:
567 			/* return a null const */
568 			con = makeConst(UNKNOWNOID,
569 							-1,
570 							InvalidOid,
571 							-2,
572 							(Datum) 0,
573 							true,
574 							false);
575 			con->location = location;
576 			return con;
577 
578 		default:
579 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(value));
580 			return NULL;		/* keep compiler quiet */
581 	}
582 
583 	con = makeConst(typeid,
584 					-1,			/* typmod -1 is OK for all cases */
585 					InvalidOid, /* all cases are uncollatable types */
586 					typelen,
587 					val,
588 					false,
589 					typebyval);
590 	con->location = location;
591 
592 	return con;
593 }
594