1 /*------------------------------------------------------------------------- 2 * 3 * createas.c 4 * Execution of CREATE TABLE ... AS, a/k/a SELECT INTO. 5 * Since CREATE MATERIALIZED VIEW shares syntax and most behaviors, 6 * we implement that here, too. 7 * 8 * We implement this by diverting the query's normal output to a 9 * specialized DestReceiver type. 10 * 11 * Formerly, CTAS was implemented as a variant of SELECT, which led 12 * to assorted legacy behaviors that we still try to preserve, notably that 13 * we must return a tuples-processed count in the QueryCompletion. (We no 14 * longer do that for CTAS ... WITH NO DATA, however.) 15 * 16 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 17 * Portions Copyright (c) 1994, Regents of the University of California 18 * 19 * 20 * IDENTIFICATION 21 * src/backend/commands/createas.c 22 * 23 *------------------------------------------------------------------------- 24 */ 25 #include "postgres.h" InfoStreamBuilder(msf::MSFBuilder & Msf,NamedStreamMap & NamedStreams)26 27 #include "access/heapam.h" 28 #include "access/htup_details.h" 29 #include "access/reloptions.h" 30 #include "access/sysattr.h" 31 #include "access/tableam.h" 32 #include "access/xact.h" 33 #include "access/xlog.h" 34 #include "catalog/namespace.h" 35 #include "catalog/toasting.h" 36 #include "commands/createas.h" 37 #include "commands/matview.h" 38 #include "commands/prepare.h" 39 #include "commands/tablecmds.h" 40 #include "commands/view.h" 41 #include "miscadmin.h" 42 #include "nodes/makefuncs.h" 43 #include "nodes/nodeFuncs.h" 44 #include "parser/parse_clause.h" 45 #include "rewrite/rewriteHandler.h" 46 #include "storage/smgr.h" 47 #include "tcop/tcopprot.h" 48 #include "utils/builtins.h" 49 #include "utils/lsyscache.h" 50 #include "utils/rel.h" 51 #include "utils/rls.h" 52 #include "utils/snapmgr.h" 53 54 typedef struct 55 { 56 DestReceiver pub; /* publicly-known function pointers */ 57 IntoClause *into; /* target relation specification */ 58 /* These fields are filled by intorel_startup: */ 59 Relation rel; /* relation to write to */ 60 ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ 61 CommandId output_cid; /* cmin to insert in output tuples */ 62 int ti_options; /* table_tuple_insert performance options */ 63 BulkInsertState bistate; /* bulk insert state */ 64 } DR_intorel; 65 66 /* utility functions for CTAS definition creation */ 67 static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into); 68 static ObjectAddress create_ctas_nodata(List *tlist, IntoClause *into); 69 70 /* DestReceiver routines for collecting data */ 71 static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo); 72 static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self); 73 static void intorel_shutdown(DestReceiver *self); 74 static void intorel_destroy(DestReceiver *self); 75 76 77 /* 78 * create_ctas_internal 79 * 80 * Internal utility used for the creation of the definition of a relation 81 * created via CREATE TABLE AS or a materialized view. Caller needs to 82 * provide a list of attributes (ColumnDef nodes). 83 */ 84 static ObjectAddress 85 create_ctas_internal(List *attrList, IntoClause *into) 86 { 87 CreateStmt *create = makeNode(CreateStmt); 88 bool is_matview; 89 char relkind; 90 Datum toast_options; 91 static char *validnsps[] = HEAP_RELOPT_NAMESPACES; 92 ObjectAddress intoRelationAddr; 93 94 /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ 95 is_matview = (into->viewQuery != NULL); 96 relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; 97 98 /* 99 * Create the target relation by faking up a CREATE TABLE parsetree and 100 * passing it to DefineRelation. 101 */ 102 create->relation = into->rel; 103 create->tableElts = attrList; 104 create->inhRelations = NIL; 105 create->ofTypename = NULL; 106 create->constraints = NIL; 107 create->options = into->options; 108 create->oncommit = into->onCommit; 109 create->tablespacename = into->tableSpaceName; 110 create->if_not_exists = false; 111 create->accessMethod = into->accessMethod; 112 113 /* 114 * Create the relation. (This will error out if there's an existing view, 115 * so we don't need more code to complain if "replace" is false.) 116 */ 117 intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); 118 119 /* 120 * If necessary, create a TOAST table for the target table. Note that 121 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so 122 * that the TOAST table will be visible for insertion. 123 */ 124 CommandCounterIncrement(); 125 126 /* parse and validate reloptions for the toast table */ 127 toast_options = transformRelOptions((Datum) 0, 128 create->options, 129 "toast", 130 validnsps, 131 true, false); 132 133 (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true); 134 135 NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options); 136 137 /* Create the "view" part of a materialized view. */ 138 if (is_matview) 139 { 140 /* StoreViewQuery scribbles on tree, so make a copy */ 141 Query *query = (Query *) copyObject(into->viewQuery); 142 143 StoreViewQuery(intoRelationAddr.objectId, query, false); 144 CommandCounterIncrement(); 145 } 146 147 return intoRelationAddr; 148 } 149 150 151 /* 152 * create_ctas_nodata 153 * 154 * Create CTAS or materialized view when WITH NO DATA is used, starting from 155 * the targetlist of the SELECT or view definition. 156 */ 157 static ObjectAddress 158 create_ctas_nodata(List *tlist, IntoClause *into) 159 { 160 List *attrList; 161 ListCell *t, 162 *lc; 163 164 /* 165 * Build list of ColumnDefs from non-junk elements of the tlist. If a 166 * column name list was specified in CREATE TABLE AS, override the column 167 * names in the query. (Too few column names are OK, too many are not.) 168 */ 169 attrList = NIL; 170 lc = list_head(into->colNames); 171 foreach(t, tlist) 172 { 173 TargetEntry *tle = (TargetEntry *) lfirst(t); 174 175 if (!tle->resjunk) 176 { 177 ColumnDef *col; 178 char *colname; 179 180 if (lc) 181 { 182 colname = strVal(lfirst(lc)); 183 lc = lnext(into->colNames, lc); 184 } 185 else 186 colname = tle->resname; 187 188 col = makeColumnDef(colname, 189 exprType((Node *) tle->expr), 190 exprTypmod((Node *) tle->expr), 191 exprCollation((Node *) tle->expr)); 192 193 /* 194 * It's possible that the column is of a collatable type but the 195 * collation could not be resolved, so double-check. (We must 196 * check this here because DefineRelation would adopt the type's 197 * default collation rather than complaining.) 198 */ 199 if (!OidIsValid(col->collOid) && 200 type_is_collatable(col->typeName->typeOid)) 201 ereport(ERROR, 202 (errcode(ERRCODE_INDETERMINATE_COLLATION), 203 errmsg("no collation was derived for column \"%s\" with collatable type %s", 204 col->colname, 205 format_type_be(col->typeName->typeOid)), 206 errhint("Use the COLLATE clause to set the collation explicitly."))); 207 208 attrList = lappend(attrList, col); 209 } 210 } 211 212 if (lc != NULL) 213 ereport(ERROR, 214 (errcode(ERRCODE_SYNTAX_ERROR), 215 errmsg("too many column names were specified"))); 216 217 /* Create the relation definition using the ColumnDef list */ 218 return create_ctas_internal(attrList, into); 219 } 220 221 222 /* 223 * ExecCreateTableAs -- execute a CREATE TABLE AS command 224 */ 225 ObjectAddress 226 ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, 227 ParamListInfo params, QueryEnvironment *queryEnv, 228 QueryCompletion *qc) 229 { 230 Query *query = castNode(Query, stmt->query); 231 IntoClause *into = stmt->into; 232 bool is_matview = (into->viewQuery != NULL); 233 DestReceiver *dest; 234 Oid save_userid = InvalidOid; 235 int save_sec_context = 0; 236 int save_nestlevel = 0; 237 ObjectAddress address; 238 List *rewritten; 239 PlannedStmt *plan; 240 QueryDesc *queryDesc; 241 242 if (stmt->if_not_exists) 243 { 244 Oid nspid; 245 246 nspid = RangeVarGetCreationNamespace(stmt->into->rel); 247 248 if (get_relname_relid(stmt->into->rel->relname, nspid)) 249 { 250 ereport(NOTICE, 251 (errcode(ERRCODE_DUPLICATE_TABLE), 252 errmsg("relation \"%s\" already exists, skipping", 253 stmt->into->rel->relname))); 254 return InvalidObjectAddress; 255 } 256 } 257 258 /* 259 * Create the tuple receiver object and insert info it will need 260 */ 261 dest = CreateIntoRelDestReceiver(into); 262 263 /* 264 * The contained Query could be a SELECT, or an EXECUTE utility command. 265 * If the latter, we just pass it off to ExecuteQuery. 266 */ 267 if (query->commandType == CMD_UTILITY && 268 IsA(query->utilityStmt, ExecuteStmt)) 269 { 270 ExecuteStmt *estmt = castNode(ExecuteStmt, query->utilityStmt); 271 272 Assert(!is_matview); /* excluded by syntax */ 273 ExecuteQuery(pstate, estmt, into, params, dest, qc); 274 275 /* get object address that intorel_startup saved for us */ 276 address = ((DR_intorel *) dest)->reladdr; 277 278 return address; 279 } 280 Assert(query->commandType == CMD_SELECT); 281 282 /* 283 * For materialized views, lock down security-restricted operations and 284 * arrange to make GUC variable changes local to this command. This is 285 * not necessary for security, but this keeps the behavior similar to 286 * REFRESH MATERIALIZED VIEW. Otherwise, one could create a materialized 287 * view not possible to refresh. 288 */ 289 if (is_matview) 290 { 291 GetUserIdAndSecContext(&save_userid, &save_sec_context); 292 SetUserIdAndSecContext(save_userid, 293 save_sec_context | SECURITY_RESTRICTED_OPERATION); 294 save_nestlevel = NewGUCNestLevel(); 295 } 296 297 if (into->skipData) 298 { 299 /* 300 * If WITH NO DATA was specified, do not go through the rewriter, 301 * planner and executor. Just define the relation using a code path 302 * similar to CREATE VIEW. This avoids dump/restore problems stemming 303 * from running the planner before all dependencies are set up. 304 */ 305 address = create_ctas_nodata(query->targetList, into); 306 } 307 else 308 { 309 /* 310 * Parse analysis was done already, but we still have to run the rule 311 * rewriter. We do not do AcquireRewriteLocks: we assume the query 312 * either came straight from the parser, or suitable locks were 313 * acquired by plancache.c. 314 * 315 * Because the rewriter and planner tend to scribble on the input, we 316 * make a preliminary copy of the source querytree. This prevents 317 * problems in the case that CTAS is in a portal or plpgsql function 318 * and is executed repeatedly. (See also the same hack in EXPLAIN and 319 * PREPARE.) 320 */ 321 rewritten = QueryRewrite(copyObject(query)); 322 323 /* SELECT should never rewrite to more or less than one SELECT query */ 324 if (list_length(rewritten) != 1) 325 elog(ERROR, "unexpected rewrite result for %s", 326 is_matview ? "CREATE MATERIALIZED VIEW" : 327 "CREATE TABLE AS SELECT"); 328 query = linitial_node(Query, rewritten); 329 Assert(query->commandType == CMD_SELECT); 330 331 /* plan the query */ 332 plan = pg_plan_query(query, pstate->p_sourcetext, 333 CURSOR_OPT_PARALLEL_OK, params); 334 335 /* 336 * Use a snapshot with an updated command ID to ensure this query sees 337 * results of any previously executed queries. (This could only 338 * matter if the planner executed an allegedly-stable function that 339 * changed the database contents, but let's do it anyway to be 340 * parallel to the EXPLAIN code path.) 341 */ 342 PushCopiedSnapshot(GetActiveSnapshot()); 343 UpdateActiveSnapshotCommandId(); 344 345 /* Create a QueryDesc, redirecting output to our tuple receiver */ 346 queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, 347 GetActiveSnapshot(), InvalidSnapshot, 348 dest, params, queryEnv, 0); 349 350 /* call ExecutorStart to prepare the plan for execution */ 351 ExecutorStart(queryDesc, GetIntoRelEFlags(into)); 352 353 /* run the plan to completion */ 354 ExecutorRun(queryDesc, ForwardScanDirection, 0L, true); 355 356 /* save the rowcount if we're given a qc to fill */ 357 if (qc) 358 SetQueryCompletion(qc, CMDTAG_SELECT, queryDesc->estate->es_processed); 359 360 /* get object address that intorel_startup saved for us */ 361 address = ((DR_intorel *) dest)->reladdr; 362 363 /* and clean up */ 364 ExecutorFinish(queryDesc); 365 ExecutorEnd(queryDesc); 366 367 FreeQueryDesc(queryDesc); 368 369 PopActiveSnapshot(); 370 } 371 372 if (is_matview) 373 { 374 /* Roll back any GUC changes */ 375 AtEOXact_GUC(false, save_nestlevel); 376 377 /* Restore userid and security context */ 378 SetUserIdAndSecContext(save_userid, save_sec_context); 379 } 380 381 return address; 382 } 383 384 /* 385 * GetIntoRelEFlags --- compute executor flags needed for CREATE TABLE AS 386 * 387 * This is exported because EXPLAIN and PREPARE need it too. (Note: those 388 * callers still need to deal explicitly with the skipData flag; since they 389 * use different methods for suppressing execution, it doesn't seem worth 390 * trying to encapsulate that part.) 391 */ 392 int 393 GetIntoRelEFlags(IntoClause *intoClause) 394 { 395 int flags = 0; 396 397 if (intoClause->skipData) 398 flags |= EXEC_FLAG_WITH_NO_DATA; 399 400 return flags; 401 } 402 403 /* 404 * CreateIntoRelDestReceiver -- create a suitable DestReceiver object 405 * 406 * intoClause will be NULL if called from CreateDestReceiver(), in which 407 * case it has to be provided later. However, it is convenient to allow 408 * self->into to be filled in immediately for other callers. 409 */ 410 DestReceiver * 411 CreateIntoRelDestReceiver(IntoClause *intoClause) 412 { 413 DR_intorel *self = (DR_intorel *) palloc0(sizeof(DR_intorel)); 414 415 self->pub.receiveSlot = intorel_receive; 416 self->pub.rStartup = intorel_startup; 417 self->pub.rShutdown = intorel_shutdown; 418 self->pub.rDestroy = intorel_destroy; 419 self->pub.mydest = DestIntoRel; 420 self->into = intoClause; 421 /* other private fields will be set during intorel_startup */ 422 423 return (DestReceiver *) self; 424 } 425 426 /* 427 * intorel_startup --- executor startup 428 */ 429 static void 430 intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) 431 { 432 DR_intorel *myState = (DR_intorel *) self; 433 IntoClause *into = myState->into; 434 bool is_matview; 435 char relkind; 436 List *attrList; 437 ObjectAddress intoRelationAddr; 438 Relation intoRelationDesc; 439 RangeTblEntry *rte; 440 ListCell *lc; 441 int attnum; 442 443 Assert(into != NULL); /* else somebody forgot to set it */ 444 445 /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ 446 is_matview = (into->viewQuery != NULL); 447 relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; 448 449 /* 450 * Build column definitions using "pre-cooked" type and collation info. If 451 * a column name list was specified in CREATE TABLE AS, override the 452 * column names derived from the query. (Too few column names are OK, too 453 * many are not.) 454 */ 455 attrList = NIL; 456 lc = list_head(into->colNames); 457 for (attnum = 0; attnum < typeinfo->natts; attnum++) 458 { 459 Form_pg_attribute attribute = TupleDescAttr(typeinfo, attnum); 460 ColumnDef *col; 461 char *colname; 462 463 if (lc) 464 { 465 colname = strVal(lfirst(lc)); 466 lc = lnext(into->colNames, lc); 467 } 468 else 469 colname = NameStr(attribute->attname); 470 471 col = makeColumnDef(colname, 472 attribute->atttypid, 473 attribute->atttypmod, 474 attribute->attcollation); 475 476 /* 477 * It's possible that the column is of a collatable type but the 478 * collation could not be resolved, so double-check. (We must check 479 * this here because DefineRelation would adopt the type's default 480 * collation rather than complaining.) 481 */ 482 if (!OidIsValid(col->collOid) && 483 type_is_collatable(col->typeName->typeOid)) 484 ereport(ERROR, 485 (errcode(ERRCODE_INDETERMINATE_COLLATION), 486 errmsg("no collation was derived for column \"%s\" with collatable type %s", 487 col->colname, 488 format_type_be(col->typeName->typeOid)), 489 errhint("Use the COLLATE clause to set the collation explicitly."))); 490 491 attrList = lappend(attrList, col); 492 } 493 494 if (lc != NULL) 495 ereport(ERROR, 496 (errcode(ERRCODE_SYNTAX_ERROR), 497 errmsg("too many column names were specified"))); 498 499 /* 500 * Actually create the target table 501 */ 502 intoRelationAddr = create_ctas_internal(attrList, into); 503 504 /* 505 * Finally we can open the target table 506 */ 507 intoRelationDesc = table_open(intoRelationAddr.objectId, AccessExclusiveLock); 508 509 /* 510 * Check INSERT permission on the constructed table. 511 * 512 * XXX: It would arguably make sense to skip this check if into->skipData 513 * is true. 514 */ 515 rte = makeNode(RangeTblEntry); 516 rte->rtekind = RTE_RELATION; 517 rte->relid = intoRelationAddr.objectId; 518 rte->relkind = relkind; 519 rte->rellockmode = RowExclusiveLock; 520 rte->requiredPerms = ACL_INSERT; 521 522 for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++) 523 rte->insertedCols = bms_add_member(rte->insertedCols, 524 attnum - FirstLowInvalidHeapAttributeNumber); 525 526 ExecCheckRTPerms(list_make1(rte), true); 527 528 /* 529 * Make sure the constructed table does not have RLS enabled. 530 * 531 * check_enable_rls() will ereport(ERROR) itself if the user has requested 532 * something invalid, and otherwise will return RLS_ENABLED if RLS should 533 * be enabled here. We don't actually support that currently, so throw 534 * our own ereport(ERROR) if that happens. 535 */ 536 if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED) 537 ereport(ERROR, 538 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), 539 errmsg("policies not yet implemented for this command"))); 540 541 /* 542 * Tentatively mark the target as populated, if it's a matview and we're 543 * going to fill it; otherwise, no change needed. 544 */ 545 if (is_matview && !into->skipData) 546 SetMatViewPopulatedState(intoRelationDesc, true); 547 548 /* 549 * Fill private fields of myState for use by later routines 550 */ 551 myState->rel = intoRelationDesc; 552 myState->reladdr = intoRelationAddr; 553 myState->output_cid = GetCurrentCommandId(true); 554 myState->ti_options = TABLE_INSERT_SKIP_FSM; 555 myState->bistate = GetBulkInsertState(); 556 557 /* 558 * Valid smgr_targblock implies something already wrote to the relation. 559 * This may be harmless, but this function hasn't planned for it. 560 */ 561 Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); 562 } 563 564 /* 565 * intorel_receive --- receive one tuple 566 */ 567 static bool 568 intorel_receive(TupleTableSlot *slot, DestReceiver *self) 569 { 570 DR_intorel *myState = (DR_intorel *) self; 571 572 /* 573 * Note that the input slot might not be of the type of the target 574 * relation. That's supported by table_tuple_insert(), but slightly less 575 * efficient than inserting with the right slot - but the alternative 576 * would be to copy into a slot of the right type, which would not be 577 * cheap either. This also doesn't allow accessing per-AM data (say a 578 * tuple's xmin), but since we don't do that here... 579 */ 580 581 table_tuple_insert(myState->rel, 582 slot, 583 myState->output_cid, 584 myState->ti_options, 585 myState->bistate); 586 587 /* We know this is a newly created relation, so there are no indexes */ 588 589 return true; 590 } 591 592 /* 593 * intorel_shutdown --- executor end 594 */ 595 static void 596 intorel_shutdown(DestReceiver *self) 597 { 598 DR_intorel *myState = (DR_intorel *) self; 599 600 FreeBulkInsertState(myState->bistate); 601 602 table_finish_bulk_insert(myState->rel, myState->ti_options); 603 604 /* close rel, but keep lock until commit */ 605 table_close(myState->rel, NoLock); 606 myState->rel = NULL; 607 } 608 609 /* 610 * intorel_destroy --- release DestReceiver object 611 */ 612 static void 613 intorel_destroy(DestReceiver *self) 614 { 615 pfree(self); 616 } 617