1 /*-------------------------------------------------------------------------
2  *
3  * parse_node.c
4  *	  various routines that make nodes for querytrees
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/parser/parse_node.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/htup_details.h"
19 #include "catalog/pg_type.h"
20 #include "mb/pg_wchar.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "parser/parsetree.h"
24 #include "parser/parse_coerce.h"
25 #include "parser/parse_expr.h"
26 #include "parser/parse_relation.h"
27 #include "utils/builtins.h"
28 #include "utils/int8.h"
29 #include "utils/lsyscache.h"
30 #include "utils/syscache.h"
31 #include "utils/varbit.h"
32 
33 
34 static void pcb_error_callback(void *arg);
35 
36 
37 /*
38  * make_parsestate
39  *		Allocate and initialize a new ParseState.
40  *
41  * Caller should eventually release the ParseState via free_parsestate().
42  */
43 ParseState *
make_parsestate(ParseState * parentParseState)44 make_parsestate(ParseState *parentParseState)
45 {
46 	ParseState *pstate;
47 
48 	pstate = palloc0(sizeof(ParseState));
49 
50 	pstate->parentParseState = parentParseState;
51 
52 	/* Fill in fields that don't start at null/false/zero */
53 	pstate->p_next_resno = 1;
54 	pstate->p_resolve_unknowns = true;
55 
56 	if (parentParseState)
57 	{
58 		pstate->p_sourcetext = parentParseState->p_sourcetext;
59 		/* all hooks are copied from parent */
60 		pstate->p_pre_columnref_hook = parentParseState->p_pre_columnref_hook;
61 		pstate->p_post_columnref_hook = parentParseState->p_post_columnref_hook;
62 		pstate->p_paramref_hook = parentParseState->p_paramref_hook;
63 		pstate->p_coerce_param_hook = parentParseState->p_coerce_param_hook;
64 		pstate->p_ref_hook_state = parentParseState->p_ref_hook_state;
65 		/* query environment stays in context for the whole parse analysis */
66 		pstate->p_queryEnv = parentParseState->p_queryEnv;
67 	}
68 
69 	return pstate;
70 }
71 
72 /*
73  * free_parsestate
74  *		Release a ParseState and any subsidiary resources.
75  */
76 void
free_parsestate(ParseState * pstate)77 free_parsestate(ParseState *pstate)
78 {
79 	/*
80 	 * Check that we did not produce too many resnos; at the very least we
81 	 * cannot allow more than 2^16, since that would exceed the range of a
82 	 * AttrNumber. It seems safest to use MaxTupleAttributeNumber.
83 	 */
84 	if (pstate->p_next_resno - 1 > MaxTupleAttributeNumber)
85 		ereport(ERROR,
86 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
87 				 errmsg("target lists can have at most %d entries",
88 						MaxTupleAttributeNumber)));
89 
90 	if (pstate->p_target_relation != NULL)
91 		heap_close(pstate->p_target_relation, NoLock);
92 
93 	pfree(pstate);
94 }
95 
96 
97 /*
98  * parser_errposition
99  *		Report a parse-analysis-time cursor position, if possible.
100  *
101  * This is expected to be used within an ereport() call.  The return value
102  * is a dummy (always 0, in fact).
103  *
104  * The locations stored in raw parsetrees are byte offsets into the source
105  * string.  We have to convert them to 1-based character indexes for reporting
106  * to clients.  (We do things this way to avoid unnecessary overhead in the
107  * normal non-error case: computing character indexes would be much more
108  * expensive than storing token offsets.)
109  */
110 int
parser_errposition(ParseState * pstate,int location)111 parser_errposition(ParseState *pstate, int location)
112 {
113 	int			pos;
114 
115 	/* No-op if location was not provided */
116 	if (location < 0)
117 		return 0;
118 	/* Can't do anything if source text is not available */
119 	if (pstate == NULL || pstate->p_sourcetext == NULL)
120 		return 0;
121 	/* Convert offset to character number */
122 	pos = pg_mbstrlen_with_len(pstate->p_sourcetext, location) + 1;
123 	/* And pass it to the ereport mechanism */
124 	return errposition(pos);
125 }
126 
127 
128 /*
129  * setup_parser_errposition_callback
130  *		Arrange for non-parser errors to report an error position
131  *
132  * Sometimes the parser calls functions that aren't part of the parser
133  * subsystem and can't reasonably be passed a ParseState; yet we would
134  * like any errors thrown in those functions to be tagged with a parse
135  * error location.  Use this function to set up an error context stack
136  * entry that will accomplish that.  Usage pattern:
137  *
138  *		declare a local variable "ParseCallbackState pcbstate"
139  *		...
140  *		setup_parser_errposition_callback(&pcbstate, pstate, location);
141  *		call function that might throw error;
142  *		cancel_parser_errposition_callback(&pcbstate);
143  */
144 void
setup_parser_errposition_callback(ParseCallbackState * pcbstate,ParseState * pstate,int location)145 setup_parser_errposition_callback(ParseCallbackState *pcbstate,
146 								  ParseState *pstate, int location)
147 {
148 	/* Setup error traceback support for ereport() */
149 	pcbstate->pstate = pstate;
150 	pcbstate->location = location;
151 	pcbstate->errcallback.callback = pcb_error_callback;
152 	pcbstate->errcallback.arg = (void *) pcbstate;
153 	pcbstate->errcallback.previous = error_context_stack;
154 	error_context_stack = &pcbstate->errcallback;
155 }
156 
157 /*
158  * Cancel a previously-set-up errposition callback.
159  */
160 void
cancel_parser_errposition_callback(ParseCallbackState * pcbstate)161 cancel_parser_errposition_callback(ParseCallbackState *pcbstate)
162 {
163 	/* Pop the error context stack */
164 	error_context_stack = pcbstate->errcallback.previous;
165 }
166 
167 /*
168  * Error context callback for inserting parser error location.
169  *
170  * Note that this will be called for *any* error occurring while the
171  * callback is installed.  We avoid inserting an irrelevant error location
172  * if the error is a query cancel --- are there any other important cases?
173  */
174 static void
pcb_error_callback(void * arg)175 pcb_error_callback(void *arg)
176 {
177 	ParseCallbackState *pcbstate = (ParseCallbackState *) arg;
178 
179 	if (geterrcode() != ERRCODE_QUERY_CANCELED)
180 		(void) parser_errposition(pcbstate->pstate, pcbstate->location);
181 }
182 
183 
184 /*
185  * make_var
186  *		Build a Var node for an attribute identified by RTE and attrno
187  */
188 Var *
make_var(ParseState * pstate,RangeTblEntry * rte,int attrno,int location)189 make_var(ParseState *pstate, RangeTblEntry *rte, int attrno, int location)
190 {
191 	Var		   *result;
192 	int			vnum,
193 				sublevels_up;
194 	Oid			vartypeid;
195 	int32		type_mod;
196 	Oid			varcollid;
197 
198 	vnum = RTERangeTablePosn(pstate, rte, &sublevels_up);
199 	get_rte_attribute_type(rte, attrno, &vartypeid, &type_mod, &varcollid);
200 	result = makeVar(vnum, attrno, vartypeid, type_mod, varcollid, sublevels_up);
201 	result->location = location;
202 	return result;
203 }
204 
205 /*
206  * transformArrayType()
207  *		Identify the types involved in a subscripting operation
208  *
209  * On entry, arrayType/arrayTypmod identify the type of the input value
210  * to be subscripted (which could be a domain type).  These are modified
211  * if necessary to identify the actual array type and typmod, and the
212  * array's element type is returned.  An error is thrown if the input isn't
213  * an array type.
214  */
215 Oid
transformArrayType(Oid * arrayType,int32 * arrayTypmod)216 transformArrayType(Oid *arrayType, int32 *arrayTypmod)
217 {
218 	Oid			origArrayType = *arrayType;
219 	Oid			elementType;
220 	HeapTuple	type_tuple_array;
221 	Form_pg_type type_struct_array;
222 
223 	/*
224 	 * If the input is a domain, smash to base type, and extract the actual
225 	 * typmod to be applied to the base type.  Subscripting a domain is an
226 	 * operation that necessarily works on the base array type, not the domain
227 	 * itself.  (Note that we provide no method whereby the creator of a
228 	 * domain over an array type could hide its ability to be subscripted.)
229 	 */
230 	*arrayType = getBaseTypeAndTypmod(*arrayType, arrayTypmod);
231 
232 	/*
233 	 * We treat int2vector and oidvector as though they were domains over
234 	 * int2[] and oid[].  This is needed because array slicing could create an
235 	 * array that doesn't satisfy the dimensionality constraints of the
236 	 * xxxvector type; so we want the result of a slice operation to be
237 	 * considered to be of the more general type.
238 	 */
239 	if (*arrayType == INT2VECTOROID)
240 		*arrayType = INT2ARRAYOID;
241 	else if (*arrayType == OIDVECTOROID)
242 		*arrayType = OIDARRAYOID;
243 
244 	/* Get the type tuple for the array */
245 	type_tuple_array = SearchSysCache1(TYPEOID, ObjectIdGetDatum(*arrayType));
246 	if (!HeapTupleIsValid(type_tuple_array))
247 		elog(ERROR, "cache lookup failed for type %u", *arrayType);
248 	type_struct_array = (Form_pg_type) GETSTRUCT(type_tuple_array);
249 
250 	/* needn't check typisdefined since this will fail anyway */
251 
252 	elementType = type_struct_array->typelem;
253 	if (elementType == InvalidOid)
254 		ereport(ERROR,
255 				(errcode(ERRCODE_DATATYPE_MISMATCH),
256 				 errmsg("cannot subscript type %s because it is not an array",
257 						format_type_be(origArrayType))));
258 
259 	ReleaseSysCache(type_tuple_array);
260 
261 	return elementType;
262 }
263 
264 /*
265  * transformArraySubscripts()
266  *		Transform array subscripting.  This is used for both
267  *		array fetch and array assignment.
268  *
269  * In an array fetch, we are given a source array value and we produce an
270  * expression that represents the result of extracting a single array element
271  * or an array slice.
272  *
273  * In an array assignment, we are given a destination array value plus a
274  * source value that is to be assigned to a single element or a slice of
275  * that array.  We produce an expression that represents the new array value
276  * with the source data inserted into the right part of the array.
277  *
278  * For both cases, if the source array is of a domain-over-array type,
279  * the result is of the base array type or its element type; essentially,
280  * we must fold a domain to its base type before applying subscripting.
281  * (Note that int2vector and oidvector are treated as domains here.)
282  *
283  * pstate		Parse state
284  * arrayBase	Already-transformed expression for the array as a whole
285  * arrayType	OID of array's datatype (should match type of arrayBase,
286  *				or be the base type of arrayBase's domain type)
287  * elementType	OID of array's element type (fetch with transformArrayType,
288  *				or pass InvalidOid to do it here)
289  * arrayTypMod	typmod for the array (which is also typmod for the elements)
290  * indirection	Untransformed list of subscripts (must not be NIL)
291  * assignFrom	NULL for array fetch, else transformed expression for source.
292  */
293 ArrayRef *
transformArraySubscripts(ParseState * pstate,Node * arrayBase,Oid arrayType,Oid elementType,int32 arrayTypMod,List * indirection,Node * assignFrom)294 transformArraySubscripts(ParseState *pstate,
295 						 Node *arrayBase,
296 						 Oid arrayType,
297 						 Oid elementType,
298 						 int32 arrayTypMod,
299 						 List *indirection,
300 						 Node *assignFrom)
301 {
302 	bool		isSlice = false;
303 	List	   *upperIndexpr = NIL;
304 	List	   *lowerIndexpr = NIL;
305 	ListCell   *idx;
306 	ArrayRef   *aref;
307 
308 	/*
309 	 * Caller may or may not have bothered to determine elementType.  Note
310 	 * that if the caller did do so, arrayType/arrayTypMod must be as modified
311 	 * by transformArrayType, ie, smash domain to base type.
312 	 */
313 	if (!OidIsValid(elementType))
314 		elementType = transformArrayType(&arrayType, &arrayTypMod);
315 
316 	/*
317 	 * A list containing only simple subscripts refers to a single array
318 	 * element.  If any of the items are slice specifiers (lower:upper), then
319 	 * the subscript expression means an array slice operation.  In this case,
320 	 * we convert any non-slice items to slices by treating the single
321 	 * subscript as the upper bound and supplying an assumed lower bound of 1.
322 	 * We have to prescan the list to see if there are any slice items.
323 	 */
324 	foreach(idx, indirection)
325 	{
326 		A_Indices  *ai = (A_Indices *) lfirst(idx);
327 
328 		if (ai->is_slice)
329 		{
330 			isSlice = true;
331 			break;
332 		}
333 	}
334 
335 	/*
336 	 * Transform the subscript expressions.
337 	 */
338 	foreach(idx, indirection)
339 	{
340 		A_Indices  *ai = lfirst_node(A_Indices, idx);
341 		Node	   *subexpr;
342 
343 		if (isSlice)
344 		{
345 			if (ai->lidx)
346 			{
347 				subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind);
348 				/* If it's not int4 already, try to coerce */
349 				subexpr = coerce_to_target_type(pstate,
350 												subexpr, exprType(subexpr),
351 												INT4OID, -1,
352 												COERCION_ASSIGNMENT,
353 												COERCE_IMPLICIT_CAST,
354 												-1);
355 				if (subexpr == NULL)
356 					ereport(ERROR,
357 							(errcode(ERRCODE_DATATYPE_MISMATCH),
358 							 errmsg("array subscript must have type integer"),
359 							 parser_errposition(pstate, exprLocation(ai->lidx))));
360 			}
361 			else if (!ai->is_slice)
362 			{
363 				/* Make a constant 1 */
364 				subexpr = (Node *) makeConst(INT4OID,
365 											 -1,
366 											 InvalidOid,
367 											 sizeof(int32),
368 											 Int32GetDatum(1),
369 											 false,
370 											 true); /* pass by value */
371 			}
372 			else
373 			{
374 				/* Slice with omitted lower bound, put NULL into the list */
375 				subexpr = NULL;
376 			}
377 			lowerIndexpr = lappend(lowerIndexpr, subexpr);
378 		}
379 		else
380 			Assert(ai->lidx == NULL && !ai->is_slice);
381 
382 		if (ai->uidx)
383 		{
384 			subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
385 			/* If it's not int4 already, try to coerce */
386 			subexpr = coerce_to_target_type(pstate,
387 											subexpr, exprType(subexpr),
388 											INT4OID, -1,
389 											COERCION_ASSIGNMENT,
390 											COERCE_IMPLICIT_CAST,
391 											-1);
392 			if (subexpr == NULL)
393 				ereport(ERROR,
394 						(errcode(ERRCODE_DATATYPE_MISMATCH),
395 						 errmsg("array subscript must have type integer"),
396 						 parser_errposition(pstate, exprLocation(ai->uidx))));
397 		}
398 		else
399 		{
400 			/* Slice with omitted upper bound, put NULL into the list */
401 			Assert(isSlice && ai->is_slice);
402 			subexpr = NULL;
403 		}
404 		upperIndexpr = lappend(upperIndexpr, subexpr);
405 	}
406 
407 	/*
408 	 * If doing an array store, coerce the source value to the right type.
409 	 * (This should agree with the coercion done by transformAssignedExpr.)
410 	 */
411 	if (assignFrom != NULL)
412 	{
413 		Oid			typesource = exprType(assignFrom);
414 		Oid			typeneeded = isSlice ? arrayType : elementType;
415 		Node	   *newFrom;
416 
417 		newFrom = coerce_to_target_type(pstate,
418 										assignFrom, typesource,
419 										typeneeded, arrayTypMod,
420 										COERCION_ASSIGNMENT,
421 										COERCE_IMPLICIT_CAST,
422 										-1);
423 		if (newFrom == NULL)
424 			ereport(ERROR,
425 					(errcode(ERRCODE_DATATYPE_MISMATCH),
426 					 errmsg("array assignment requires type %s"
427 							" but expression is of type %s",
428 							format_type_be(typeneeded),
429 							format_type_be(typesource)),
430 					 errhint("You will need to rewrite or cast the expression."),
431 					 parser_errposition(pstate, exprLocation(assignFrom))));
432 		assignFrom = newFrom;
433 	}
434 
435 	/*
436 	 * Ready to build the ArrayRef node.
437 	 */
438 	aref = makeNode(ArrayRef);
439 	aref->refarraytype = arrayType;
440 	aref->refelemtype = elementType;
441 	aref->reftypmod = arrayTypMod;
442 	/* refcollid will be set by parse_collate.c */
443 	aref->refupperindexpr = upperIndexpr;
444 	aref->reflowerindexpr = lowerIndexpr;
445 	aref->refexpr = (Expr *) arrayBase;
446 	aref->refassgnexpr = (Expr *) assignFrom;
447 
448 	return aref;
449 }
450 
451 /*
452  * make_const
453  *
454  *	Convert a Value node (as returned by the grammar) to a Const node
455  *	of the "natural" type for the constant.  Note that this routine is
456  *	only used when there is no explicit cast for the constant, so we
457  *	have to guess what type is wanted.
458  *
459  *	For string literals we produce a constant of type UNKNOWN ---- whose
460  *	representation is the same as cstring, but it indicates to later type
461  *	resolution that we're not sure yet what type it should be considered.
462  *	Explicit "NULL" constants are also typed as UNKNOWN.
463  *
464  *	For integers and floats we produce int4, int8, or numeric depending
465  *	on the value of the number.  XXX We should produce int2 as well,
466  *	but additional cleanup is needed before we can do that; there are
467  *	too many examples that fail if we try.
468  */
469 Const *
make_const(ParseState * pstate,Value * value,int location)470 make_const(ParseState *pstate, Value *value, int location)
471 {
472 	Const	   *con;
473 	Datum		val;
474 	int64		val64;
475 	Oid			typeid;
476 	int			typelen;
477 	bool		typebyval;
478 	ParseCallbackState pcbstate;
479 
480 	switch (nodeTag(value))
481 	{
482 		case T_Integer:
483 			val = Int32GetDatum(intVal(value));
484 
485 			typeid = INT4OID;
486 			typelen = sizeof(int32);
487 			typebyval = true;
488 			break;
489 
490 		case T_Float:
491 			/* could be an oversize integer as well as a float ... */
492 			if (scanint8(strVal(value), true, &val64))
493 			{
494 				/*
495 				 * It might actually fit in int32. Probably only INT_MIN can
496 				 * occur, but we'll code the test generally just to be sure.
497 				 */
498 				int32		val32 = (int32) val64;
499 
500 				if (val64 == (int64) val32)
501 				{
502 					val = Int32GetDatum(val32);
503 
504 					typeid = INT4OID;
505 					typelen = sizeof(int32);
506 					typebyval = true;
507 				}
508 				else
509 				{
510 					val = Int64GetDatum(val64);
511 
512 					typeid = INT8OID;
513 					typelen = sizeof(int64);
514 					typebyval = FLOAT8PASSBYVAL;	/* int8 and float8 alike */
515 				}
516 			}
517 			else
518 			{
519 				/* arrange to report location if numeric_in() fails */
520 				setup_parser_errposition_callback(&pcbstate, pstate, location);
521 				val = DirectFunctionCall3(numeric_in,
522 										  CStringGetDatum(strVal(value)),
523 										  ObjectIdGetDatum(InvalidOid),
524 										  Int32GetDatum(-1));
525 				cancel_parser_errposition_callback(&pcbstate);
526 
527 				typeid = NUMERICOID;
528 				typelen = -1;	/* variable len */
529 				typebyval = false;
530 			}
531 			break;
532 
533 		case T_String:
534 
535 			/*
536 			 * We assume here that UNKNOWN's internal representation is the
537 			 * same as CSTRING
538 			 */
539 			val = CStringGetDatum(strVal(value));
540 
541 			typeid = UNKNOWNOID;	/* will be coerced later */
542 			typelen = -2;		/* cstring-style varwidth type */
543 			typebyval = false;
544 			break;
545 
546 		case T_BitString:
547 			/* arrange to report location if bit_in() fails */
548 			setup_parser_errposition_callback(&pcbstate, pstate, location);
549 			val = DirectFunctionCall3(bit_in,
550 									  CStringGetDatum(strVal(value)),
551 									  ObjectIdGetDatum(InvalidOid),
552 									  Int32GetDatum(-1));
553 			cancel_parser_errposition_callback(&pcbstate);
554 			typeid = BITOID;
555 			typelen = -1;
556 			typebyval = false;
557 			break;
558 
559 		case T_Null:
560 			/* return a null const */
561 			con = makeConst(UNKNOWNOID,
562 							-1,
563 							InvalidOid,
564 							-2,
565 							(Datum) 0,
566 							true,
567 							false);
568 			con->location = location;
569 			return con;
570 
571 		default:
572 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(value));
573 			return NULL;		/* keep compiler quiet */
574 	}
575 
576 	con = makeConst(typeid,
577 					-1,			/* typmod -1 is OK for all cases */
578 					InvalidOid, /* all cases are uncollatable types */
579 					typelen,
580 					val,
581 					false,
582 					typebyval);
583 	con->location = location;
584 
585 	return con;
586 }
587