1 /*-------------------------------------------------------------------------
2  *
3  * createas.c
4  *	  Execution of CREATE TABLE ... AS, a/k/a SELECT INTO.
5  *	  Since CREATE MATERIALIZED VIEW shares syntax and most behaviors,
6  *	  we implement that here, too.
7  *
8  * We implement this by diverting the query's normal output to a
9  * specialized DestReceiver type.
10  *
11  * Formerly, CTAS was implemented as a variant of SELECT, which led
12  * to assorted legacy behaviors that we still try to preserve, notably that
13  * we must return a tuples-processed count in the completionTag.  (We no
14  * longer do that for CTAS ... WITH NO DATA, however.)
15  *
16  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
17  * Portions Copyright (c) 1994, Regents of the University of California
18  *
19  *
20  * IDENTIFICATION
21  *	  src/backend/commands/createas.c
22  *
23  *-------------------------------------------------------------------------
24  */
25 #include "postgres.h"
26 
27 #include "access/reloptions.h"
28 #include "access/htup_details.h"
29 #include "access/sysattr.h"
30 #include "access/xact.h"
31 #include "access/xlog.h"
32 #include "catalog/namespace.h"
33 #include "catalog/toasting.h"
34 #include "commands/createas.h"
35 #include "commands/matview.h"
36 #include "commands/prepare.h"
37 #include "commands/tablecmds.h"
38 #include "commands/view.h"
39 #include "miscadmin.h"
40 #include "nodes/makefuncs.h"
41 #include "nodes/nodeFuncs.h"
42 #include "parser/parse_clause.h"
43 #include "rewrite/rewriteHandler.h"
44 #include "storage/smgr.h"
45 #include "tcop/tcopprot.h"
46 #include "utils/builtins.h"
47 #include "utils/lsyscache.h"
48 #include "utils/rel.h"
49 #include "utils/rls.h"
50 #include "utils/snapmgr.h"
51 
52 
53 typedef struct
54 {
55 	DestReceiver pub;			/* publicly-known function pointers */
56 	IntoClause *into;			/* target relation specification */
57 	/* These fields are filled by intorel_startup: */
58 	Relation	rel;			/* relation to write to */
59 	ObjectAddress reladdr;		/* address of rel, for ExecCreateTableAs */
60 	CommandId	output_cid;		/* cmin to insert in output tuples */
61 	int			hi_options;		/* heap_insert performance options */
62 	BulkInsertState bistate;	/* bulk insert state */
63 } DR_intorel;
64 
65 /* utility functions for CTAS definition creation */
66 static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into);
67 static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into);
68 
69 /* DestReceiver routines for collecting data */
70 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo);
71 static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self);
72 static void intorel_shutdown(DestReceiver *self);
73 static void intorel_destroy(DestReceiver *self);
74 
75 
76 /*
77  * create_ctas_internal
78  *
79  * Internal utility used for the creation of the definition of a relation
80  * created via CREATE TABLE AS or a materialized view.  Caller needs to
81  * provide a list of attributes (ColumnDef nodes).
82  */
83 static ObjectAddress
84 create_ctas_internal(List *attrList, IntoClause *into)
85 {
86 	CreateStmt *create = makeNode(CreateStmt);
87 	bool		is_matview;
88 	char		relkind;
89 	Datum		toast_options;
90 	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
91 	ObjectAddress intoRelationAddr;
92 
93 	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
94 	is_matview = (into->viewQuery != NULL);
95 	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
96 
97 	/*
98 	 * Create the target relation by faking up a CREATE TABLE parsetree and
99 	 * passing it to DefineRelation.
100 	 */
101 	create->relation = into->rel;
102 	create->tableElts = attrList;
103 	create->inhRelations = NIL;
104 	create->ofTypename = NULL;
105 	create->constraints = NIL;
106 	create->options = into->options;
107 	create->oncommit = into->onCommit;
108 	create->tablespacename = into->tableSpaceName;
109 	create->if_not_exists = false;
110 
111 	/*
112 	 * Create the relation.  (This will error out if there's an existing view,
113 	 * so we don't need more code to complain if "replace" is false.)
114 	 */
115 	intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
116 
117 	/*
118 	 * If necessary, create a TOAST table for the target table.  Note that
119 	 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
120 	 * that the TOAST table will be visible for insertion.
121 	 */
122 	CommandCounterIncrement();
123 
124 	/* parse and validate reloptions for the toast table */
125 	toast_options = transformRelOptions((Datum) 0,
126 										create->options,
127 										"toast",
128 										validnsps,
129 										true, false);
130 
131 	(void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);
132 
133 	NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);
134 
135 	/* Create the "view" part of a materialized view. */
136 	if (is_matview)
137 	{
138 		/* StoreViewQuery scribbles on tree, so make a copy */
139 		Query	   *query = (Query *) copyObject(into->viewQuery);
140 
141 		StoreViewQuery(intoRelationAddr.objectId, query, false);
142 		CommandCounterIncrement();
143 	}
144 
145 	return intoRelationAddr;
146 }
147 
148 
149 /*
150  * create_ctas_nodata
151  *
152  * Create CTAS or materialized view when WITH NO DATA is used, starting from
153  * the targetlist of the SELECT or view definition.
154  */
155 static ObjectAddress
156 create_ctas_nodata(List *tlist, IntoClause *into)
157 {
158 	List	   *attrList;
159 	ListCell   *t,
160 			   *lc;
161 
162 	/*
163 	 * Build list of ColumnDefs from non-junk elements of the tlist.  If a
164 	 * column name list was specified in CREATE TABLE AS, override the column
165 	 * names in the query.  (Too few column names are OK, too many are not.)
166 	 */
167 	attrList = NIL;
168 	lc = list_head(into->colNames);
169 	foreach(t, tlist)
170 	{
171 		TargetEntry *tle = (TargetEntry *) lfirst(t);
172 
173 		if (!tle->resjunk)
174 		{
175 			ColumnDef  *col;
176 			char	   *colname;
177 
178 			if (lc)
179 			{
180 				colname = strVal(lfirst(lc));
181 				lc = lnext(lc);
182 			}
183 			else
184 				colname = tle->resname;
185 
186 			col = makeColumnDef(colname,
187 								exprType((Node *) tle->expr),
188 								exprTypmod((Node *) tle->expr),
189 								exprCollation((Node *) tle->expr));
190 
191 			/*
192 			 * It's possible that the column is of a collatable type but the
193 			 * collation could not be resolved, so double-check.  (We must
194 			 * check this here because DefineRelation would adopt the type's
195 			 * default collation rather than complaining.)
196 			 */
197 			if (!OidIsValid(col->collOid) &&
198 				type_is_collatable(col->typeName->typeOid))
199 				ereport(ERROR,
200 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
201 						 errmsg("no collation was derived for column \"%s\" with collatable type %s",
202 								col->colname,
203 								format_type_be(col->typeName->typeOid)),
204 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
205 
206 			attrList = lappend(attrList, col);
207 		}
208 	}
209 
210 	if (lc != NULL)
211 		ereport(ERROR,
212 				(errcode(ERRCODE_SYNTAX_ERROR),
213 				 errmsg("too many column names were specified")));
214 
215 	/* Create the relation definition using the ColumnDef list */
216 	return create_ctas_internal(attrList, into);
217 }
218 
219 
220 /*
221  * ExecCreateTableAs -- execute a CREATE TABLE AS command
222  */
223 ObjectAddress
224 ExecCreateTableAs(CreateTableAsStmt *stmt, const char *queryString,
225 				  ParamListInfo params, QueryEnvironment *queryEnv,
226 				  char *completionTag)
227 {
228 	Query	   *query = castNode(Query, stmt->query);
229 	IntoClause *into = stmt->into;
230 	bool		is_matview = (into->viewQuery != NULL);
231 	DestReceiver *dest;
232 	Oid			save_userid = InvalidOid;
233 	int			save_sec_context = 0;
234 	int			save_nestlevel = 0;
235 	ObjectAddress address;
236 	List	   *rewritten;
237 	PlannedStmt *plan;
238 	QueryDesc  *queryDesc;
239 
240 	if (stmt->if_not_exists)
241 	{
242 		Oid			nspid;
243 
244 		nspid = RangeVarGetCreationNamespace(stmt->into->rel);
245 
246 		if (get_relname_relid(stmt->into->rel->relname, nspid))
247 		{
248 			ereport(NOTICE,
249 					(errcode(ERRCODE_DUPLICATE_TABLE),
250 					 errmsg("relation \"%s\" already exists, skipping",
251 							stmt->into->rel->relname)));
252 			return InvalidObjectAddress;
253 		}
254 	}
255 
256 	/*
257 	 * Create the tuple receiver object and insert info it will need
258 	 */
259 	dest = CreateIntoRelDestReceiver(into);
260 
261 	/*
262 	 * The contained Query could be a SELECT, or an EXECUTE utility command.
263 	 * If the latter, we just pass it off to ExecuteQuery.
264 	 */
265 	if (query->commandType == CMD_UTILITY &&
266 		IsA(query->utilityStmt, ExecuteStmt))
267 	{
268 		ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt);
269 
270 		Assert(!is_matview);	/* excluded by syntax */
271 		ExecuteQuery(estmt, into, queryString, params, dest, completionTag);
272 
273 		/* get object address that intorel_startup saved for us */
274 		address = ((DR_intorel *) dest)->reladdr;
275 
276 		return address;
277 	}
278 	Assert(query->commandType == CMD_SELECT);
279 
280 	/*
281 	 * For materialized views, lock down security-restricted operations and
282 	 * arrange to make GUC variable changes local to this command.  This is
283 	 * not necessary for security, but this keeps the behavior similar to
284 	 * REFRESH MATERIALIZED VIEW.  Otherwise, one could create a materialized
285 	 * view not possible to refresh.
286 	 */
287 	if (is_matview)
288 	{
289 		GetUserIdAndSecContext(&save_userid, &save_sec_context);
290 		SetUserIdAndSecContext(save_userid,
291 							   save_sec_context | SECURITY_RESTRICTED_OPERATION);
292 		save_nestlevel = NewGUCNestLevel();
293 	}
294 
295 	if (into->skipData)
296 	{
297 		/*
298 		 * If WITH NO DATA was specified, do not go through the rewriter,
299 		 * planner and executor.  Just define the relation using a code path
300 		 * similar to CREATE VIEW.  This avoids dump/restore problems stemming
301 		 * from running the planner before all dependencies are set up.
302 		 */
303 		address = create_ctas_nodata(query->targetList, into);
304 	}
305 	else
306 	{
307 		/*
308 		 * Parse analysis was done already, but we still have to run the rule
309 		 * rewriter.  We do not do AcquireRewriteLocks: we assume the query
310 		 * either came straight from the parser, or suitable locks were
311 		 * acquired by plancache.c.
312 		 *
313 		 * Because the rewriter and planner tend to scribble on the input, we
314 		 * make a preliminary copy of the source querytree.  This prevents
315 		 * problems in the case that CTAS is in a portal or plpgsql function
316 		 * and is executed repeatedly.  (See also the same hack in EXPLAIN and
317 		 * PREPARE.)
318 		 */
319 		rewritten = QueryRewrite(copyObject(query));
320 
321 		/* SELECT should never rewrite to more or less than one SELECT query */
322 		if (list_length(rewritten) != 1)
323 			elog(ERROR, "unexpected rewrite result for %s",
324 				 is_matview ? "CREATE MATERIALIZED VIEW" :
325 				 "CREATE TABLE AS SELECT");
326 		query = linitial_node(Query, rewritten);
327 		Assert(query->commandType == CMD_SELECT);
328 
329 		/* plan the query */
330 		plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, params);
331 
332 		/*
333 		 * Use a snapshot with an updated command ID to ensure this query sees
334 		 * results of any previously executed queries.  (This could only
335 		 * matter if the planner executed an allegedly-stable function that
336 		 * changed the database contents, but let's do it anyway to be
337 		 * parallel to the EXPLAIN code path.)
338 		 */
339 		PushCopiedSnapshot(GetActiveSnapshot());
340 		UpdateActiveSnapshotCommandId();
341 
342 		/* Create a QueryDesc, redirecting output to our tuple receiver */
343 		queryDesc = CreateQueryDesc(plan, queryString,
344 									GetActiveSnapshot(), InvalidSnapshot,
345 									dest, params, queryEnv, 0);
346 
347 		/* call ExecutorStart to prepare the plan for execution */
348 		ExecutorStart(queryDesc, GetIntoRelEFlags(into));
349 
350 		/* run the plan to completion */
351 		ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);
352 
353 		/* save the rowcount if we're given a completionTag to fill */
354 		if (completionTag)
355 			snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
356 					 "SELECT " UINT64_FORMAT,
357 					 queryDesc->estate->es_processed);
358 
359 		/* get object address that intorel_startup saved for us */
360 		address = ((DR_intorel *) dest)->reladdr;
361 
362 		/* and clean up */
363 		ExecutorFinish(queryDesc);
364 		ExecutorEnd(queryDesc);
365 
366 		FreeQueryDesc(queryDesc);
367 
368 		PopActiveSnapshot();
369 	}
370 
371 	if (is_matview)
372 	{
373 		/* Roll back any GUC changes */
374 		AtEOXact_GUC(false, save_nestlevel);
375 
376 		/* Restore userid and security context */
377 		SetUserIdAndSecContext(save_userid, save_sec_context);
378 	}
379 
380 	return address;
381 }
382 
383 /*
384  * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS
385  *
386  * This is exported because EXPLAIN and PREPARE need it too.  (Note: those
387  * callers still need to deal explicitly with the skipData flag; since they
388  * use different methods for suppressing execution, it doesn't seem worth
389  * trying to encapsulate that part.)
390  */
391 int
392 GetIntoRelEFlags(IntoClause *intoClause)
393 {
394 	int			flags;
395 
396 	/*
397 	 * We need to tell the executor whether it has to produce OIDs or not,
398 	 * because it doesn't have enough information to do so itself (since we
399 	 * can't build the target relation until after ExecutorStart).
400 	 *
401 	 * Disallow the OIDS option for materialized views.
402 	 */
403 	if (interpretOidsOption(intoClause->options,
404 							(intoClause->viewQuery == NULL)))
405 		flags = EXEC_FLAG_WITH_OIDS;
406 	else
407 		flags = EXEC_FLAG_WITHOUT_OIDS;
408 
409 	if (intoClause->skipData)
410 		flags |= EXEC_FLAG_WITH_NO_DATA;
411 
412 	return flags;
413 }
414 
415 /*
416  * CreateIntoRelDestReceiver -- create a suitable DestReceiver object
417  *
418  * intoClause will be NULL if called from CreateDestReceiver(), in which
419  * case it has to be provided later.  However, it is convenient to allow
420  * self->into to be filled in immediately for other callers.
421  */
422 DestReceiver *
423 CreateIntoRelDestReceiver(IntoClause *intoClause)
424 {
425 	DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel));
426 
427 	self->pub.receiveSlot = intorel_receive;
428 	self->pub.rStartup = intorel_startup;
429 	self->pub.rShutdown = intorel_shutdown;
430 	self->pub.rDestroy = intorel_destroy;
431 	self->pub.mydest = DestIntoRel;
432 	self->into = intoClause;
433 	/* other private fields will be set during intorel_startup */
434 
435 	return (DestReceiver *) self;
436 }
437 
438 /*
439  * intorel_startup --- executor startup
440  */
441 static void
442 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
443 {
444 	DR_intorel *myState = (DR_intorel *) self;
445 	IntoClause *into = myState->into;
446 	bool		is_matview;
447 	char		relkind;
448 	List	   *attrList;
449 	ObjectAddress intoRelationAddr;
450 	Relation	intoRelationDesc;
451 	RangeTblEntry *rte;
452 	ListCell   *lc;
453 	int			attnum;
454 
455 	Assert(into != NULL);		/* else somebody forgot to set it */
456 
457 	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
458 	is_matview = (into->viewQuery != NULL);
459 	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;
460 
461 	/*
462 	 * Build column definitions using "pre-cooked" type and collation info. If
463 	 * a column name list was specified in CREATE TABLE AS, override the
464 	 * column names derived from the query.  (Too few column names are OK, too
465 	 * many are not.)
466 	 */
467 	attrList = NIL;
468 	lc = list_head(into->colNames);
469 	for (attnum = 0; attnum < typeinfo->natts; attnum++)
470 	{
471 		Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum);
472 		ColumnDef  *col;
473 		char	   *colname;
474 
475 		if (lc)
476 		{
477 			colname = strVal(lfirst(lc));
478 			lc = lnext(lc);
479 		}
480 		else
481 			colname = NameStr(attribute->attname);
482 
483 		col = makeColumnDef(colname,
484 							attribute->atttypid,
485 							attribute->atttypmod,
486 							attribute->attcollation);
487 
488 		/*
489 		 * It's possible that the column is of a collatable type but the
490 		 * collation could not be resolved, so double-check.  (We must check
491 		 * this here because DefineRelation would adopt the type's default
492 		 * collation rather than complaining.)
493 		 */
494 		if (!OidIsValid(col->collOid) &&
495 			type_is_collatable(col->typeName->typeOid))
496 			ereport(ERROR,
497 					(errcode(ERRCODE_INDETERMINATE_COLLATION),
498 					 errmsg("no collation was derived for column \"%s\" with collatable type %s",
499 							col->colname,
500 							format_type_be(col->typeName->typeOid)),
501 					 errhint("Use the COLLATE clause to set the collation explicitly.")));
502 
503 		attrList = lappend(attrList, col);
504 	}
505 
506 	if (lc != NULL)
507 		ereport(ERROR,
508 				(errcode(ERRCODE_SYNTAX_ERROR),
509 				 errmsg("too many column names were specified")));
510 
511 	/*
512 	 * Actually create the target table
513 	 */
514 	intoRelationAddr = create_ctas_internal(attrList, into);
515 
516 	/*
517 	 * Finally we can open the target table
518 	 */
519 	intoRelationDesc = heap_open(intoRelationAddr.objectId, AccessExclusiveLock);
520 
521 	/*
522 	 * Check INSERT permission on the constructed table.
523 	 *
524 	 * XXX: It would arguably make sense to skip this check if into->skipData
525 	 * is true.
526 	 */
527 	rte = makeNode(RangeTblEntry);
528 	rte->rtekind = RTE_RELATION;
529 	rte->relid = intoRelationAddr.objectId;
530 	rte->relkind = relkind;
531 	rte->requiredPerms = ACL_INSERT;
532 
533 	for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++)
534 		rte->insertedCols = bms_add_member(rte->insertedCols,
535 										   attnum - FirstLowInvalidHeapAttributeNumber);
536 
537 	ExecCheckRTPerms(list_make1(rte), true);
538 
539 	/*
540 	 * Make sure the constructed table does not have RLS enabled.
541 	 *
542 	 * check_enable_rls() will ereport(ERROR) itself if the user has requested
543 	 * something invalid, and otherwise will return RLS_ENABLED if RLS should
544 	 * be enabled here.  We don't actually support that currently, so throw
545 	 * our own ereport(ERROR) if that happens.
546 	 */
547 	if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
548 		ereport(ERROR,
549 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
550 				 (errmsg("policies not yet implemented for this command"))));
551 
552 	/*
553 	 * Tentatively mark the target as populated, if it's a matview and we're
554 	 * going to fill it; otherwise, no change needed.
555 	 */
556 	if (is_matview && !into->skipData)
557 		SetMatViewPopulatedState(intoRelationDesc, true);
558 
559 	/*
560 	 * Fill private fields of myState for use by later routines
561 	 */
562 	myState->rel = intoRelationDesc;
563 	myState->reladdr = intoRelationAddr;
564 	myState->output_cid = GetCurrentCommandId(true);
565 
566 	/*
567 	 * We can skip WAL-logging the insertions, unless PITR or streaming
568 	 * replication is in use. We can skip the FSM in any case.
569 	 */
570 	myState->hi_options = HEAP_INSERT_SKIP_FSM |
571 		(XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
572 	myState->bistate = GetBulkInsertState();
573 
574 	/* Not using WAL requires smgr_targblock be initially invalid */
575 	Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
576 }
577 
578 /*
579  * intorel_receive --- receive one tuple
580  */
581 static bool
582 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
583 {
584 	DR_intorel *myState = (DR_intorel *) self;
585 	HeapTuple	tuple;
586 
587 	/*
588 	 * get the heap tuple out of the tuple table slot, making sure we have a
589 	 * writable copy
590 	 */
591 	tuple = ExecMaterializeSlot(slot);
592 
593 	/*
594 	 * force assignment of new OID (see comments in ExecInsert)
595 	 */
596 	if (myState->rel->rd_rel->relhasoids)
597 		HeapTupleSetOid(tuple, InvalidOid);
598 
599 	heap_insert(myState->rel,
600 				tuple,
601 				myState->output_cid,
602 				myState->hi_options,
603 				myState->bistate);
604 
605 	/* We know this is a newly created relation, so there are no indexes */
606 
607 	return true;
608 }
609 
610 /*
611  * intorel_shutdown --- executor end
612  */
613 static void
614 intorel_shutdown(DestReceiver *self)
615 {
616 	DR_intorel *myState = (DR_intorel *) self;
617 
618 	FreeBulkInsertState(myState->bistate);
619 
620 	/* If we skipped using WAL, must heap_sync before commit */
621 	if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
622 		heap_sync(myState->rel);
623 
624 	/* close rel, but keep lock until commit */
625 	heap_close(myState->rel, NoLock);
626 	myState->rel = NULL;
627 }
628 
629 /*
630  * intorel_destroy --- release DestReceiver object
631  */
632 static void
633 intorel_destroy(DestReceiver *self)
634 {
635 	pfree(self);
636 }
637