1 /*------------------------------------------------------------------------- 2 * 3 * createas.c 4 * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO. 5 * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors, 6 * we implement that here, too. 7 * 8 * We implement this by diverting the query's normal output to a 9 * specialized DestReceiver type. 10 * 11 * Formerly, CTAS was implemented as a variant of SELECT, which led 12 * to assorted legacy behaviors that we still try to preserve, notably that 13 * we must return a tuples-processed count in the completionTag. (We no 14 * longer do that for CTAS ... WITH NO DATA, however.) 15 * 16 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 17 * Portions Copyright (c) 1994, Regents of the University of California 18 * 19 * 20 * IDENTIFICATION 21 * src/backend/commands/createas.c 22 * 23 *------------------------------------------------------------------------- 24 */ 25 #include "postgres.h" 26 27 #include "access/reloptions.h" 28 #include "access/htup_details.h" 29 #include "access/sysattr.h" 30 #include "access/xact.h" 31 #include "access/xlog.h" 32 #include "catalog/namespace.h" 33 #include "catalog/toasting.h" 34 #include "commands/createas.h" 35 #include "commands/matview.h" 36 #include "commands/prepare.h" 37 #include "commands/tablecmds.h" 38 #include "commands/view.h" 39 #include "miscadmin.h" 40 #include "nodes/makefuncs.h" 41 #include "nodes/nodeFuncs.h" 42 #include "parser/parse_clause.h" 43 #include "rewrite/rewriteHandler.h" 44 #include "storage/smgr.h" 45 #include "tcop/tcopprot.h" 46 #include "utils/builtins.h" 47 #include "utils/lsyscache.h" 48 #include "utils/rel.h" 49 #include "utils/rls.h" 50 #include "utils/snapmgr.h" 51 52 53 typedef struct 54 { 55 DestReceiver pub; /* publicly-known function pointers */ 56 IntoClause *into; /* target relation specification */ 57 /* These fields are filled by intorel_startup: */ 58 Relation rel; /* relation to write to */ 59 ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ 60 CommandId output_cid; /* cmin to insert in output tuples */ 61 int hi_options; /* heap_insert performance options */ 62 BulkInsertState bistate; /* bulk insert state */ 63 } DR_intorel; 64 65 /* utility functions for CTAS definition creation */ 66 static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into); 67 static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into); 68 69 /* DestReceiver routines for collecting data */ 70 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); 71 static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self); 72 static void intorel_shutdown(DestReceiver *self); 73 static void intorel_destroy(DestReceiver *self); 74 75 76 /* 77 * create_ctas_internal 78 * 79 * Internal utility used for the creation of the definition of a relation 80 * created via CREATE TABLE AS or a materialized view. Caller needs to 81 * provide a list of attributes (ColumnDef nodes). 82 */ 83 static ObjectAddress 84 create_ctas_internal(List *attrList, IntoClause *into) 85 { 86 CreateStmt *create = makeNode(CreateStmt); 87 bool is_matview; 88 char relkind; 89 Datum toast_options; 90 static char *validnsps[] = HEAP_RELOPT_NAMESPACES; 91 ObjectAddress intoRelationAddr; 92 93 /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ 94 is_matview = (into->viewQuery != NULL); 95 relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; 96 97 /* 98 * Create the target relation by faking up a CREATE TABLE parsetree and 99 * passing it to DefineRelation. 100 */ 101 create->relation = into->rel; 102 create->tableElts = attrList; 103 create->inhRelations = NIL; 104 create->ofTypename = NULL; 105 create->constraints = NIL; 106 create->options = into->options; 107 create->oncommit = into->onCommit; 108 create->tablespacename = into->tableSpaceName; 109 create->if_not_exists = false; 110 111 /* 112 * Create the relation. (This will error out if there's an existing view, 113 * so we don't need more code to complain if "replace" is false.) 114 */ 115 intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); 116 117 /* 118 * If necessary, create a TOAST table for the target table. Note that 119 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so 120 * that the TOAST table will be visible for insertion. 121 */ 122 CommandCounterIncrement(); 123 124 /* parse and validate reloptions for the toast table */ 125 toast_options = transformRelOptions((Datum) 0, 126 create->options, 127 "toast", 128 validnsps, 129 true, false); 130 131 (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true); 132 133 NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options); 134 135 /* Create the "view" part of a materialized view. */ 136 if (is_matview) 137 { 138 /* StoreViewQuery scribbles on tree, so make a copy */ 139 Query *query = (Query *) copyObject(into->viewQuery); 140 141 StoreViewQuery(intoRelationAddr.objectId, query, false); 142 CommandCounterIncrement(); 143 } 144 145 return intoRelationAddr; 146 } 147 148 149 /* 150 * create_ctas_nodata 151 * 152 * Create CTAS or materialized view when WITH NO DATA is used, starting from 153 * the targetlist of the SELECT or view definition. 154 */ 155 static ObjectAddress 156 create_ctas_nodata(List *tlist, IntoClause *into) 157 { 158 List *attrList; 159 ListCell *t, 160 *lc; 161 162 /* 163 * Build list of ColumnDefs from non-junk elements of the tlist. If a 164 * column name list was specified in CREATE TABLE AS, override the column 165 * names in the query. (Too few column names are OK, too many are not.) 166 */ 167 attrList = NIL; 168 lc = list_head(into->colNames); 169 foreach(t, tlist) 170 { 171 TargetEntry *tle = (TargetEntry *) lfirst(t); 172 173 if (!tle->resjunk) 174 { 175 ColumnDef *col; 176 char *colname; 177 178 if (lc) 179 { 180 colname = strVal(lfirst(lc)); 181 lc = lnext(lc); 182 } 183 else 184 colname = tle->resname; 185 186 col = makeColumnDef(colname, 187 exprType((Node *) tle->expr), 188 exprTypmod((Node *) tle->expr), 189 exprCollation((Node *) tle->expr)); 190 191 /* 192 * It's possible that the column is of a collatable type but the 193 * collation could not be resolved, so double-check. (We must 194 * check this here because DefineRelation would adopt the type's 195 * default collation rather than complaining.) 196 */ 197 if (!OidIsValid(col->collOid) && 198 type_is_collatable(col->typeName->typeOid)) 199 ereport(ERROR, 200 (errcode(ERRCODE_INDETERMINATE_COLLATION), 201 errmsg("no collation was derived for column \"%s\" with collatable type %s", 202 col->colname, 203 format_type_be(col->typeName->typeOid)), 204 errhint("Use the COLLATE clause to set the collation explicitly."))); 205 206 attrList = lappend(attrList, col); 207 } 208 } 209 210 if (lc != NULL) 211 ereport(ERROR, 212 (errcode(ERRCODE_SYNTAX_ERROR), 213 errmsg("too many column names were specified"))); 214 215 /* Create the relation definition using the ColumnDef list */ 216 return create_ctas_internal(attrList, into); 217 } 218 219 220 /* 221 * ExecCreateTableAs -- execute a CREATE TABLE AS command 222 */ 223 ObjectAddress 224 ExecCreateTableAs(CreateTableAsStmt *stmt, const char *queryString, 225 ParamListInfo params, QueryEnvironment *queryEnv, 226 char *completionTag) 227 { 228 Query *query = castNode(Query, stmt->query); 229 IntoClause *into = stmt->into; 230 bool is_matview = (into->viewQuery != NULL); 231 DestReceiver *dest; 232 Oid save_userid = InvalidOid; 233 int save_sec_context = 0; 234 int save_nestlevel = 0; 235 ObjectAddress address; 236 List *rewritten; 237 PlannedStmt *plan; 238 QueryDesc *queryDesc; 239 240 if (stmt->if_not_exists) 241 { 242 Oid nspid; 243 244 nspid = RangeVarGetCreationNamespace(stmt->into->rel); 245 246 if (get_relname_relid(stmt->into->rel->relname, nspid)) 247 { 248 ereport(NOTICE, 249 (errcode(ERRCODE_DUPLICATE_TABLE), 250 errmsg("relation \"%s\" already exists, skipping", 251 stmt->into->rel->relname))); 252 return InvalidObjectAddress; 253 } 254 } 255 256 /* 257 * Create the tuple receiver object and insert info it will need 258 */ 259 dest = CreateIntoRelDestReceiver(into); 260 261 /* 262 * The contained Query could be a SELECT, or an EXECUTE utility command. 263 * If the latter, we just pass it off to ExecuteQuery. 264 */ 265 if (query->commandType == CMD_UTILITY && 266 IsA(query->utilityStmt, ExecuteStmt)) 267 { 268 ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt); 269 270 Assert(!is_matview); /* excluded by syntax */ 271 ExecuteQuery(estmt, into, queryString, params, dest, completionTag); 272 273 /* get object address that intorel_startup saved for us */ 274 address = ((DR_intorel *) dest)->reladdr; 275 276 return address; 277 } 278 Assert(query->commandType == CMD_SELECT); 279 280 /* 281 * For materialized views, lock down security-restricted operations and 282 * arrange to make GUC variable changes local to this command. This is 283 * not necessary for security, but this keeps the behavior similar to 284 * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized 285 * view not possible to refresh. 286 */ 287 if (is_matview) 288 { 289 GetUserIdAndSecContext(&save_userid, &save_sec_context); 290 SetUserIdAndSecContext(save_userid, 291 save_sec_context | SECURITY_RESTRICTED_OPERATION); 292 save_nestlevel = NewGUCNestLevel(); 293 } 294 295 if (into->skipData) 296 { 297 /* 298 * If WITH NO DATA was specified, do not go through the rewriter, 299 * planner and executor. Just define the relation using a code path 300 * similar to CREATE VIEW. This avoids dump/restore problems stemming 301 * from running the planner before all dependencies are set up. 302 */ 303 address = create_ctas_nodata(query->targetList, into); 304 } 305 else 306 { 307 /* 308 * Parse analysis was done already, but we still have to run the rule 309 * rewriter. We do not do AcquireRewriteLocks: we assume the query 310 * either came straight from the parser, or suitable locks were 311 * acquired by plancache.c. 312 * 313 * Because the rewriter and planner tend to scribble on the input, we 314 * make a preliminary copy of the source querytree. This prevents 315 * problems in the case that CTAS is in a portal or plpgsql function 316 * and is executed repeatedly. (See also the same hack in EXPLAIN and 317 * PREPARE.) 318 */ 319 rewritten = QueryRewrite(copyObject(query)); 320 321 /* SELECT should never rewrite to more or less than one SELECT query */ 322 if (list_length(rewritten) != 1) 323 elog(ERROR, "unexpected rewrite result for %s", 324 is_matview ? "CREATE MATERIALIZED VIEW" : 325 "CREATE TABLE AS SELECT"); 326 query = linitial_node(Query, rewritten); 327 Assert(query->commandType == CMD_SELECT); 328 329 /* plan the query */ 330 plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, params); 331 332 /* 333 * Use a snapshot with an updated command ID to ensure this query sees 334 * results of any previously executed queries. (This could only 335 * matter if the planner executed an allegedly-stable function that 336 * changed the database contents, but let's do it anyway to be 337 * parallel to the EXPLAIN code path.) 338 */ 339 PushCopiedSnapshot(GetActiveSnapshot()); 340 UpdateActiveSnapshotCommandId(); 341 342 /* Create a QueryDesc, redirecting output to our tuple receiver */ 343 queryDesc = CreateQueryDesc(plan, queryString, 344 GetActiveSnapshot(), InvalidSnapshot, 345 dest, params, queryEnv, 0); 346 347 /* call ExecutorStart to prepare the plan for execution */ 348 ExecutorStart(queryDesc, GetIntoRelEFlags(into)); 349 350 /* run the plan to completion */ 351 ExecutorRun(queryDesc, ForwardScanDirection, 0L, true); 352 353 /* save the rowcount if we're given a completionTag to fill */ 354 if (completionTag) 355 snprintf(completionTag, COMPLETION_TAG_BUFSIZE, 356 "SELECT " UINT64_FORMAT, 357 queryDesc->estate->es_processed); 358 359 /* get object address that intorel_startup saved for us */ 360 address = ((DR_intorel *) dest)->reladdr; 361 362 /* and clean up */ 363 ExecutorFinish(queryDesc); 364 ExecutorEnd(queryDesc); 365 366 FreeQueryDesc(queryDesc); 367 368 PopActiveSnapshot(); 369 } 370 371 if (is_matview) 372 { 373 /* Roll back any GUC changes */ 374 AtEOXact_GUC(false, save_nestlevel); 375 376 /* Restore userid and security context */ 377 SetUserIdAndSecContext(save_userid, save_sec_context); 378 } 379 380 return address; 381 } 382 383 /* 384 * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS 385 * 386 * This is exported because EXPLAIN and PREPARE need it too. (Note: those 387 * callers still need to deal explicitly with the skipData flag; since they 388 * use different methods for suppressing execution, it doesn't seem worth 389 * trying to encapsulate that part.) 390 */ 391 int 392 GetIntoRelEFlags(IntoClause *intoClause) 393 { 394 int flags; 395 396 /* 397 * We need to tell the executor whether it has to produce OIDs or not, 398 * because it doesn't have enough information to do so itself (since we 399 * can't build the target relation until after ExecutorStart). 400 * 401 * Disallow the OIDS option for materialized views. 402 */ 403 if (interpretOidsOption(intoClause->options, 404 (intoClause->viewQuery == NULL))) 405 flags = EXEC_FLAG_WITH_OIDS; 406 else 407 flags = EXEC_FLAG_WITHOUT_OIDS; 408 409 if (intoClause->skipData) 410 flags |= EXEC_FLAG_WITH_NO_DATA; 411 412 return flags; 413 } 414 415 /* 416 * CreateIntoRelDestReceiver -- create a suitable DestReceiver object 417 * 418 * intoClause will be NULL if called from CreateDestReceiver(), in which 419 * case it has to be provided later. However, it is convenient to allow 420 * self->into to be filled in immediately for other callers. 421 */ 422 DestReceiver * 423 CreateIntoRelDestReceiver(IntoClause *intoClause) 424 { 425 DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel)); 426 427 self->pub.receiveSlot = intorel_receive; 428 self->pub.rStartup = intorel_startup; 429 self->pub.rShutdown = intorel_shutdown; 430 self->pub.rDestroy = intorel_destroy; 431 self->pub.mydest = DestIntoRel; 432 self->into = intoClause; 433 /* other private fields will be set during intorel_startup */ 434 435 return (DestReceiver *) self; 436 } 437 438 /* 439 * intorel_startup --- executor startup 440 */ 441 static void 442 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) 443 { 444 DR_intorel *myState = (DR_intorel *) self; 445 IntoClause *into = myState->into; 446 bool is_matview; 447 char relkind; 448 List *attrList; 449 ObjectAddress intoRelationAddr; 450 Relation intoRelationDesc; 451 RangeTblEntry *rte; 452 ListCell *lc; 453 int attnum; 454 455 Assert(into != NULL); /* else somebody forgot to set it */ 456 457 /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ 458 is_matview = (into->viewQuery != NULL); 459 relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; 460 461 /* 462 * Build column definitions using "pre-cooked" type and collation info. If 463 * a column name list was specified in CREATE TABLE AS, override the 464 * column names derived from the query. (Too few column names are OK, too 465 * many are not.) 466 */ 467 attrList = NIL; 468 lc = list_head(into->colNames); 469 for (attnum = 0; attnum < typeinfo->natts; attnum++) 470 { 471 Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum); 472 ColumnDef *col; 473 char *colname; 474 475 if (lc) 476 { 477 colname = strVal(lfirst(lc)); 478 lc = lnext(lc); 479 } 480 else 481 colname = NameStr(attribute->attname); 482 483 col = makeColumnDef(colname, 484 attribute->atttypid, 485 attribute->atttypmod, 486 attribute->attcollation); 487 488 /* 489 * It's possible that the column is of a collatable type but the 490 * collation could not be resolved, so double-check. (We must check 491 * this here because DefineRelation would adopt the type's default 492 * collation rather than complaining.) 493 */ 494 if (!OidIsValid(col->collOid) && 495 type_is_collatable(col->typeName->typeOid)) 496 ereport(ERROR, 497 (errcode(ERRCODE_INDETERMINATE_COLLATION), 498 errmsg("no collation was derived for column \"%s\" with collatable type %s", 499 col->colname, 500 format_type_be(col->typeName->typeOid)), 501 errhint("Use the COLLATE clause to set the collation explicitly."))); 502 503 attrList = lappend(attrList, col); 504 } 505 506 if (lc != NULL) 507 ereport(ERROR, 508 (errcode(ERRCODE_SYNTAX_ERROR), 509 errmsg("too many column names were specified"))); 510 511 /* 512 * Actually create the target table 513 */ 514 intoRelationAddr = create_ctas_internal(attrList, into); 515 516 /* 517 * Finally we can open the target table 518 */ 519 intoRelationDesc = heap_open(intoRelationAddr.objectId, AccessExclusiveLock); 520 521 /* 522 * Check INSERT permission on the constructed table. 523 * 524 * XXX: It would arguably make sense to skip this check if into->skipData 525 * is true. 526 */ 527 rte = makeNode(RangeTblEntry); 528 rte->rtekind = RTE_RELATION; 529 rte->relid = intoRelationAddr.objectId; 530 rte->relkind = relkind; 531 rte->requiredPerms = ACL_INSERT; 532 533 for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++) 534 rte->insertedCols = bms_add_member(rte->insertedCols, 535 attnum - FirstLowInvalidHeapAttributeNumber); 536 537 ExecCheckRTPerms(list_make1(rte), true); 538 539 /* 540 * Make sure the constructed table does not have RLS enabled. 541 * 542 * check_enable_rls() will ereport(ERROR) itself if the user has requested 543 * something invalid, and otherwise will return RLS_ENABLED if RLS should 544 * be enabled here. We don't actually support that currently, so throw 545 * our own ereport(ERROR) if that happens. 546 */ 547 if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED) 548 ereport(ERROR, 549 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 550 (errmsg("policies not yet implemented for this command")))); 551 552 /* 553 * Tentatively mark the target as populated, if it's a matview and we're 554 * going to fill it; otherwise, no change needed. 555 */ 556 if (is_matview && !into->skipData) 557 SetMatViewPopulatedState(intoRelationDesc, true); 558 559 /* 560 * Fill private fields of myState for use by later routines 561 */ 562 myState->rel = intoRelationDesc; 563 myState->reladdr = intoRelationAddr; 564 myState->output_cid = GetCurrentCommandId(true); 565 566 /* 567 * We can skip WAL-logging the insertions, unless PITR or streaming 568 * replication is in use. We can skip the FSM in any case. 569 */ 570 myState->hi_options = HEAP_INSERT_SKIP_FSM | 571 (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL); 572 myState->bistate = GetBulkInsertState(); 573 574 /* Not using WAL requires smgr_targblock be initially invalid */ 575 Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); 576 } 577 578 /* 579 * intorel_receive --- receive one tuple 580 */ 581 static bool 582 intorel_receive(TupleTableSlot *slot, DestReceiver *self) 583 { 584 DR_intorel *myState = (DR_intorel *) self; 585 HeapTuple tuple; 586 587 /* 588 * get the heap tuple out of the tuple table slot, making sure we have a 589 * writable copy 590 */ 591 tuple = ExecMaterializeSlot(slot); 592 593 /* 594 * force assignment of new OID (see comments in ExecInsert) 595 */ 596 if (myState->rel->rd_rel->relhasoids) 597 HeapTupleSetOid(tuple, InvalidOid); 598 599 heap_insert(myState->rel, 600 tuple, 601 myState->output_cid, 602 myState->hi_options, 603 myState->bistate); 604 605 /* We know this is a newly created relation, so there are no indexes */ 606 607 return true; 608 } 609 610 /* 611 * intorel_shutdown --- executor end 612 */ 613 static void 614 intorel_shutdown(DestReceiver *self) 615 { 616 DR_intorel *myState = (DR_intorel *) self; 617 618 FreeBulkInsertState(myState->bistate); 619 620 /* If we skipped using WAL, must heap_sync before commit */ 621 if (myState->hi_options & HEAP_INSERT_SKIP_WAL) 622 heap_sync(myState->rel); 623 624 /* close rel, but keep lock until commit */ 625 heap_close(myState->rel, NoLock); 626 myState->rel = NULL; 627 } 628 629 /* 630 * intorel_destroy --- release DestReceiver object 631 */ 632 static void 633 intorel_destroy(DestReceiver *self) 634 { 635 pfree(self); 636 } 637