1 /*-------------------------------------------------------------------------
2  *
3  * createas.c
4  *	  Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
5  *	  Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
6  *	  we implement that here, too.
7  *
8  * We implement this by diverting the query's normal output to a
9  * specialized DestReceiver type.
10  *
11  * Formerly, CTAS was implemented as a variant of SELECT, which led
12  * to assorted legacy behaviors that we still try to preserve, notably that
13  * we must return a tuples-processed count in the QueryCompletion.  (We no
14  * longer do that for CTAS ... WITH NO DATA, however.)
15  *
16  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
17  * Portions Copyright (c) 1994, Regents of the University of California
18  *
19  *
20  * IDENTIFICATION
21  *	  src/backend/commands/createas.c
22  *
23  *-------------------------------------------------------------------------
24  */
25 #include "postgres.h"
InfoStreamBuilder(msf::MSFBuilder & Msf,NamedStreamMap & NamedStreams)26 
27 #include "access/heapam.h"
28 #include "access/htup_details.h"
29 #include "access/reloptions.h"
30 #include "access/sysattr.h"
31 #include "access/tableam.h"
32 #include "access/xact.h"
33 #include "access/xlog.h"
34 #include "catalog/namespace.h"
35 #include "catalog/toasting.h"
36 #include "commands/createas.h"
37 #include "commands/matview.h"
38 #include "commands/prepare.h"
39 #include "commands/tablecmds.h"
40 #include "commands/view.h"
41 #include "miscadmin.h"
42 #include "nodes/makefuncs.h"
43 #include "nodes/nodeFuncs.h"
44 #include "parser/parse_clause.h"
45 #include "rewrite/rewriteHandler.h"
46 #include "storage/smgr.h"
47 #include "tcop/tcopprot.h"
48 #include "utils/builtins.h"
49 #include "utils/lsyscache.h"
50 #include "utils/rel.h"
51 #include "utils/rls.h"
52 #include "utils/snapmgr.h"
53 
54 typedef struct
55 {
56 	DestReceiver pub;			/* publicly-known function pointers */
57 	IntoClause *into;			/* target relation specification */
58 	/* These fields are filled by intorel_startup: */
59 	Relation	rel;			/* relation to write to */
60 	ObjectAddress reladdr;		/* address of rel, for ExecCreateTableAs */
61 	CommandId	output_cid;		/* cmin to insert in output tuples */
62 	int			ti_options;		/* table_tuple_insert performance options */
63 	BulkInsertState bistate;	/* bulk insert state */
64 } DR_intorel;
65 
66 /* utility functions for CTAS definition creation */
67 static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
68 static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
69 
70 /* DestReceiver routines for collecting data */
71 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
72 static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
73 static void intorel_shutdown(DestReceiver *self);
74 static void intorel_destroy(DestReceiver *self);
75 
76 
77 /*
78  * create_ctas_internal
79  *
80  * Internal utility used for the creation of the definition of a relation
81  * created via CREATE TABLE AS or a materialized view.  Caller needs to
82  * provide a list of attributes (ColumnDef nodes).
83  */
84 static ObjectAddress
85 create_ctas_internal(List *attrList, IntoClause *into)
86 {
87 	CreateStmt *create = makeNode(CreateStmt);
88 	bool		is_matview;
89 	char		relkind;
90 	Datum		toast_options;
91 	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
92 	ObjectAddress intoRelationAddr;
93 
94 	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
95 	is_matview = (into->viewQuery != NULL);
96 	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
97 
98 	/*
99 	 * Create the target relation by faking up a CREATE TABLE parsetree and
100 	 * passing it to DefineRelation.
101 	 */
102 	create->relation = into->rel;
103 	create->tableElts = attrList;
104 	create->inhRelations = NIL;
105 	create->ofTypename = NULL;
106 	create->constraints = NIL;
107 	create->options = into->options;
108 	create->oncommit = into->onCommit;
109 	create->tablespacename = into->tableSpaceName;
110 	create->if_not_exists = false;
111 	create->accessMethod = into->accessMethod;
112 
113 	/*
114 	 * Create the relation.  (This will error out if there's an existing view,
115 	 * so we don't need more code to complain if "replace" is false.)
116 	 */
117 	intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
118 
119 	/*
120 	 * If necessary, create a TOAST table for the target table.  Note that
121 	 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
122 	 * that the TOAST table will be visible for insertion.
123 	 */
124 	CommandCounterIncrement();
125 
126 	/* parse and validate reloptions for the toast table */
127 	toast_options = transformRelOptions((Datum) 0,
128 										create->options,
129 										"toast",
130 										validnsps,
131 										true, false);
132 
133 	(void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);
134 
135 	NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
136 
137 	/* Create the "view" part of a materialized view. */
138 	if (is_matview)
139 	{
140 		/* StoreViewQuery scribbles on tree, so make a copy */
141 		Query	   *query = (Query *) copyObject(into->viewQuery);
142 
143 		StoreViewQuery(intoRelationAddr.objectId, query, false);
144 		CommandCounterIncrement();
145 	}
146 
147 	return intoRelationAddr;
148 }
149 
150 
151 /*
152  * create_ctas_nodata
153  *
154  * Create CTAS or materialized view when WITH NO DATA is used, starting from
155  * the targetlist of the SELECT or view definition.
156  */
157 static ObjectAddress
158 create_ctas_nodata(List *tlist, IntoClause *into)
159 {
160 	List	   *attrList;
161 	ListCell   *t,
162 			   *lc;
163 
164 	/*
165 	 * Build list of ColumnDefs from non-junk elements of the tlist.  If a
166 	 * column name list was specified in CREATE TABLE AS, override the column
167 	 * names in the query.  (Too few column names are OK, too many are not.)
168 	 */
169 	attrList = NIL;
170 	lc = list_head(into->colNames);
171 	foreach(t, tlist)
172 	{
173 		TargetEntry *tle = (TargetEntry *) lfirst(t);
174 
175 		if (!tle->resjunk)
176 		{
177 			ColumnDef  *col;
178 			char	   *colname;
179 
180 			if (lc)
181 			{
182 				colname = strVal(lfirst(lc));
183 				lc = lnext(into->colNames, lc);
184 			}
185 			else
186 				colname = tle->resname;
187 
188 			col = makeColumnDef(colname,
189 								exprType((Node *) tle->expr),
190 								exprTypmod((Node *) tle->expr),
191 								exprCollation((Node *) tle->expr));
192 
193 			/*
194 			 * It's possible that the column is of a collatable type but the
195 			 * collation could not be resolved, so double-check.  (We must
196 			 * check this here because DefineRelation would adopt the type's
197 			 * default collation rather than complaining.)
198 			 */
199 			if (!OidIsValid(col->collOid) &&
200 				type_is_collatable(col->typeName->typeOid))
201 				ereport(ERROR,
202 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
203 						 errmsg("no collation was derived for column \"%s\" with collatable type %s",
204 								col->colname,
205 								format_type_be(col->typeName->typeOid)),
206 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
207 
208 			attrList = lappend(attrList, col);
209 		}
210 	}
211 
212 	if (lc != NULL)
213 		ereport(ERROR,
214 				(errcode(ERRCODE_SYNTAX_ERROR),
215 				 errmsg("too many column names were specified")));
216 
217 	/* Create the relation definition using the ColumnDef list */
218 	return create_ctas_internal(attrList, into);
219 }
220 
221 
222 /*
223  * ExecCreateTableAs -- execute a CREATE TABLE AS command
224  */
225 ObjectAddress
226 ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
227 				  ParamListInfo params, QueryEnvironment *queryEnv,
228 				  QueryCompletion *qc)
229 {
230 	Query	   *query = castNode(Query, stmt->query);
231 	IntoClause *into = stmt->into;
232 	bool		is_matview = (into->viewQuery != NULL);
233 	DestReceiver *dest;
234 	Oid			save_userid = InvalidOid;
235 	int			save_sec_context = 0;
236 	int			save_nestlevel = 0;
237 	ObjectAddress address;
238 	List	   *rewritten;
239 	PlannedStmt *plan;
240 	QueryDesc  *queryDesc;
241 
242 	if (stmt->if_not_exists)
243 	{
244 		Oid			nspid;
245 
246 		nspid = RangeVarGetCreationNamespace(stmt->into->rel);
247 
248 		if (get_relname_relid(stmt->into->rel->relname, nspid))
249 		{
250 			ereport(NOTICE,
251 					(errcode(ERRCODE_DUPLICATE_TABLE),
252 					 errmsg("relation \"%s\" already exists, skipping",
253 							stmt->into->rel->relname)));
254 			return InvalidObjectAddress;
255 		}
256 	}
257 
258 	/*
259 	 * Create the tuple receiver object and insert info it will need
260 	 */
261 	dest = CreateIntoRelDestReceiver(into);
262 
263 	/*
264 	 * The contained Query could be a SELECT, or an EXECUTE utility command.
265 	 * If the latter, we just pass it off to ExecuteQuery.
266 	 */
267 	if (query->commandType == CMD_UTILITY &&
268 		IsA(query->utilityStmt, ExecuteStmt))
269 	{
270 		ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
271 
272 		Assert(!is_matview);	/* excluded by syntax */
273 		ExecuteQuery(pstate, estmt, into, params, dest, qc);
274 
275 		/* get object address that intorel_startup saved for us */
276 		address = ((DR_intorel *) dest)->reladdr;
277 
278 		return address;
279 	}
280 	Assert(query->commandType == CMD_SELECT);
281 
282 	/*
283 	 * For materialized views, lock down security-restricted operations and
284 	 * arrange to make GUC variable changes local to this command.  This is
285 	 * not necessary for security, but this keeps the behavior similar to
286 	 * REFRESH MATERIALIZED VIEW.  Otherwise, one could create a materialized
287 	 * view not possible to refresh.
288 	 */
289 	if (is_matview)
290 	{
291 		GetUserIdAndSecContext(&save_userid, &save_sec_context);
292 		SetUserIdAndSecContext(save_userid,
293 							   save_sec_context | SECURITY_RESTRICTED_OPERATION);
294 		save_nestlevel = NewGUCNestLevel();
295 	}
296 
297 	if (into->skipData)
298 	{
299 		/*
300 		 * If WITH NO DATA was specified, do not go through the rewriter,
301 		 * planner and executor.  Just define the relation using a code path
302 		 * similar to CREATE VIEW.  This avoids dump/restore problems stemming
303 		 * from running the planner before all dependencies are set up.
304 		 */
305 		address = create_ctas_nodata(query->targetList, into);
306 	}
307 	else
308 	{
309 		/*
310 		 * Parse analysis was done already, but we still have to run the rule
311 		 * rewriter.  We do not do AcquireRewriteLocks: we assume the query
312 		 * either came straight from the parser, or suitable locks were
313 		 * acquired by plancache.c.
314 		 *
315 		 * Because the rewriter and planner tend to scribble on the input, we
316 		 * make a preliminary copy of the source querytree.  This prevents
317 		 * problems in the case that CTAS is in a portal or plpgsql function
318 		 * and is executed repeatedly.  (See also the same hack in EXPLAIN and
319 		 * PREPARE.)
320 		 */
321 		rewritten = QueryRewrite(copyObject(query));
322 
323 		/* SELECT should never rewrite to more or less than one SELECT query */
324 		if (list_length(rewritten) != 1)
325 			elog(ERROR, "unexpected rewrite result for %s",
326 				 is_matview ? "CREATE MATERIALIZED VIEW" :
327 				 "CREATE TABLE AS SELECT");
328 		query = linitial_node(Query, rewritten);
329 		Assert(query->commandType == CMD_SELECT);
330 
331 		/* plan the query */
332 		plan = pg_plan_query(query, pstate->p_sourcetext,
333 							 CURSOR_OPT_PARALLEL_OK, params);
334 
335 		/*
336 		 * Use a snapshot with an updated command ID to ensure this query sees
337 		 * results of any previously executed queries.  (This could only
338 		 * matter if the planner executed an allegedly-stable function that
339 		 * changed the database contents, but let's do it anyway to be
340 		 * parallel to the EXPLAIN code path.)
341 		 */
342 		PushCopiedSnapshot(GetActiveSnapshot());
343 		UpdateActiveSnapshotCommandId();
344 
345 		/* Create a QueryDesc, redirecting output to our tuple receiver */
346 		queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
347 									GetActiveSnapshot(), InvalidSnapshot,
348 									dest, params, queryEnv, 0);
349 
350 		/* call ExecutorStart to prepare the plan for execution */
351 		ExecutorStart(queryDesc, GetIntoRelEFlags(into));
352 
353 		/* run the plan to completion */
354 		ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);
355 
356 		/* save the rowcount if we're given a qc to fill */
357 		if (qc)
358 			SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed);
359 
360 		/* get object address that intorel_startup saved for us */
361 		address = ((DR_intorel *) dest)->reladdr;
362 
363 		/* and clean up */
364 		ExecutorFinish(queryDesc);
365 		ExecutorEnd(queryDesc);
366 
367 		FreeQueryDesc(queryDesc);
368 
369 		PopActiveSnapshot();
370 	}
371 
372 	if (is_matview)
373 	{
374 		/* Roll back any GUC changes */
375 		AtEOXact_GUC(false, save_nestlevel);
376 
377 		/* Restore userid and security context */
378 		SetUserIdAndSecContext(save_userid, save_sec_context);
379 	}
380 
381 	return address;
382 }
383 
384 /*
385  * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
386  *
387  * This is exported because EXPLAIN and PREPARE need it too.  (Note: those
388  * callers still need to deal explicitly with the skipData flag; since they
389  * use different methods for suppressing execution, it doesn't seem worth
390  * trying to encapsulate that part.)
391  */
392 int
393 GetIntoRelEFlags(IntoClause *intoClause)
394 {
395 	int			flags = 0;
396 
397 	if (intoClause->skipData)
398 		flags |= EXEC_FLAG_WITH_NO_DATA;
399 
400 	return flags;
401 }
402 
403 /*
404  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
405  *
406  * intoClause will be NULL if called from CreateDestReceiver(), in which
407  * case it has to be provided later.  However, it is convenient to allow
408  * self->into to be filled in immediately for other callers.
409  */
410 DestReceiver *
411 CreateIntoRelDestReceiver(IntoClause *intoClause)
412 {
413 	DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
414 
415 	self->pub.receiveSlot = intorel_receive;
416 	self->pub.rStartup = intorel_startup;
417 	self->pub.rShutdown = intorel_shutdown;
418 	self->pub.rDestroy = intorel_destroy;
419 	self->pub.mydest = DestIntoRel;
420 	self->into = intoClause;
421 	/* other private fields will be set during intorel_startup */
422 
423 	return (DestReceiver *) self;
424 }
425 
426 /*
427  * intorel_startup --- executor startup
428  */
429 static void
430 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
431 {
432 	DR_intorel *myState = (DR_intorel *) self;
433 	IntoClause *into = myState->into;
434 	bool		is_matview;
435 	char		relkind;
436 	List	   *attrList;
437 	ObjectAddress intoRelationAddr;
438 	Relation	intoRelationDesc;
439 	RangeTblEntry *rte;
440 	ListCell   *lc;
441 	int			attnum;
442 
443 	Assert(into != NULL);		/* else somebody forgot to set it */
444 
445 	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
446 	is_matview = (into->viewQuery != NULL);
447 	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
448 
449 	/*
450 	 * Build column definitions using "pre-cooked" type and collation info. If
451 	 * a column name list was specified in CREATE TABLE AS, override the
452 	 * column names derived from the query.  (Too few column names are OK, too
453 	 * many are not.)
454 	 */
455 	attrList = NIL;
456 	lc = list_head(into->colNames);
457 	for (attnum = 0; attnum < typeinfo->natts; attnum++)
458 	{
459 		Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
460 		ColumnDef  *col;
461 		char	   *colname;
462 
463 		if (lc)
464 		{
465 			colname = strVal(lfirst(lc));
466 			lc = lnext(into->colNames, lc);
467 		}
468 		else
469 			colname = NameStr(attribute->attname);
470 
471 		col = makeColumnDef(colname,
472 							attribute->atttypid,
473 							attribute->atttypmod,
474 							attribute->attcollation);
475 
476 		/*
477 		 * It's possible that the column is of a collatable type but the
478 		 * collation could not be resolved, so double-check.  (We must check
479 		 * this here because DefineRelation would adopt the type's default
480 		 * collation rather than complaining.)
481 		 */
482 		if (!OidIsValid(col->collOid) &&
483 			type_is_collatable(col->typeName->typeOid))
484 			ereport(ERROR,
485 					(errcode(ERRCODE_INDETERMINATE_COLLATION),
486 					 errmsg("no collation was derived for column \"%s\" with collatable type %s",
487 							col->colname,
488 							format_type_be(col->typeName->typeOid)),
489 					 errhint("Use the COLLATE clause to set the collation explicitly.")));
490 
491 		attrList = lappend(attrList, col);
492 	}
493 
494 	if (lc != NULL)
495 		ereport(ERROR,
496 				(errcode(ERRCODE_SYNTAX_ERROR),
497 				 errmsg("too many column names were specified")));
498 
499 	/*
500 	 * Actually create the target table
501 	 */
502 	intoRelationAddr = create_ctas_internal(attrList, into);
503 
504 	/*
505 	 * Finally we can open the target table
506 	 */
507 	intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock);
508 
509 	/*
510 	 * Check INSERT permission on the constructed table.
511 	 *
512 	 * XXX: It would arguably make sense to skip this check if into->skipData
513 	 * is true.
514 	 */
515 	rte = makeNode(RangeTblEntry);
516 	rte->rtekind = RTE_RELATION;
517 	rte->relid = intoRelationAddr.objectId;
518 	rte->relkind = relkind;
519 	rte->rellockmode = RowExclusiveLock;
520 	rte->requiredPerms = ACL_INSERT;
521 
522 	for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++)
523 		rte->insertedCols = bms_add_member(rte->insertedCols,
524 										   attnum - FirstLowInvalidHeapAttributeNumber);
525 
526 	ExecCheckRTPerms(list_make1(rte), true);
527 
528 	/*
529 	 * Make sure the constructed table does not have RLS enabled.
530 	 *
531 	 * check_enable_rls() will ereport(ERROR) itself if the user has requested
532 	 * something invalid, and otherwise will return RLS_ENABLED if RLS should
533 	 * be enabled here.  We don't actually support that currently, so throw
534 	 * our own ereport(ERROR) if that happens.
535 	 */
536 	if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
537 		ereport(ERROR,
538 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
539 				 errmsg("policies not yet implemented for this command")));
540 
541 	/*
542 	 * Tentatively mark the target as populated, if it's a matview and we're
543 	 * going to fill it; otherwise, no change needed.
544 	 */
545 	if (is_matview && !into->skipData)
546 		SetMatViewPopulatedState(intoRelationDesc, true);
547 
548 	/*
549 	 * Fill private fields of myState for use by later routines
550 	 */
551 	myState->rel = intoRelationDesc;
552 	myState->reladdr = intoRelationAddr;
553 	myState->output_cid = GetCurrentCommandId(true);
554 	myState->ti_options = TABLE_INSERT_SKIP_FSM;
555 	myState->bistate = GetBulkInsertState();
556 
557 	/*
558 	 * Valid smgr_targblock implies something already wrote to the relation.
559 	 * This may be harmless, but this function hasn't planned for it.
560 	 */
561 	Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
562 }
563 
564 /*
565  * intorel_receive --- receive one tuple
566  */
567 static bool
568 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
569 {
570 	DR_intorel *myState = (DR_intorel *) self;
571 
572 	/*
573 	 * Note that the input slot might not be of the type of the target
574 	 * relation. That's supported by table_tuple_insert(), but slightly less
575 	 * efficient than inserting with the right slot - but the alternative
576 	 * would be to copy into a slot of the right type, which would not be
577 	 * cheap either. This also doesn't allow accessing per-AM data (say a
578 	 * tuple's xmin), but since we don't do that here...
579 	 */
580 
581 	table_tuple_insert(myState->rel,
582 					   slot,
583 					   myState->output_cid,
584 					   myState->ti_options,
585 					   myState->bistate);
586 
587 	/* We know this is a newly created relation, so there are no indexes */
588 
589 	return true;
590 }
591 
592 /*
593  * intorel_shutdown --- executor end
594  */
595 static void
596 intorel_shutdown(DestReceiver *self)
597 {
598 	DR_intorel *myState = (DR_intorel *) self;
599 
600 	FreeBulkInsertState(myState->bistate);
601 
602 	table_finish_bulk_insert(myState->rel, myState->ti_options);
603 
604 	/* close rel, but keep lock until commit */
605 	table_close(myState->rel, NoLock);
606 	myState->rel = NULL;
607 }
608 
609 /*
610  * intorel_destroy --- release DestReceiver object
611  */
612 static void
613 intorel_destroy(DestReceiver *self)
614 {
615 	pfree(self);
616 }
617