1 /*-------------------------------------------------------------------------
2  *
3  * toasting.c
4  *	  This file contains routines to support creation of toast tables
5  *
6  *
7  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *	  src/backend/catalog/toasting.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
new() -> Self17 #include "access/heapam.h"
18 #include "access/xact.h"
19 #include "catalog/binary_upgrade.h"
20 #include "catalog/catalog.h"
21 #include "catalog/dependency.h"
22 #include "catalog/heap.h"
23 #include "catalog/index.h"
24 #include "catalog/namespace.h"
25 #include "catalog/pg_am.h"
26 #include "catalog/pg_namespace.h"
27 #include "catalog/pg_opclass.h"
28 #include "catalog/pg_type.h"
29 #include "catalog/toasting.h"
30 #include "miscadmin.h"
31 #include "nodes/makefuncs.h"
32 #include "storage/lock.h"
33 #include "utils/builtins.h"
34 #include "utils/rel.h"
35 #include "utils/syscache.h"
36 
37 /* Potentially set by pg_upgrade_support functions */
38 Oid			binary_upgrade_next_toast_pg_type_oid = InvalidOid;
39 
40 static void CheckAndCreateToastTable(Oid relOid, Datum reloptions,
41 									 LOCKMODE lockmode, bool check,
42 									 Oid OIDOldToast);
43 static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
44 							   Datum reloptions, LOCKMODE lockmode, bool check,
45 							   Oid OIDOldToast);
46 static bool needs_toast_table(Relation rel);
47 
48 
49 /*
50  * CreateToastTable variants
51  *		If the table needs a toast table, and doesn't already have one,
52  *		then create a toast table for it.
53  *
54  * reloptions for the toast table can be passed, too.  Pass (Datum) 0
55  * for default reloptions.
56  *
57  * We expect the caller to have verified that the relation is a table and have
58  * already done any necessary permission checks.  Callers expect this function
59  * to end with CommandCounterIncrement if it makes any changes.
60  */
61 void
62 AlterTableCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode)
63 {
64 	CheckAndCreateToastTable(relOid, reloptions, lockmode, true, InvalidOid);
65 }
66 
67 void
68 NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode,
69 						Oid OIDOldToast)
70 {
71 	CheckAndCreateToastTable(relOid, reloptions, lockmode, false, OIDOldToast);
72 }
73 
74 void
75 NewRelationCreateToastTable(Oid relOid, Datum reloptions)
76 {
77 	CheckAndCreateToastTable(relOid, reloptions, AccessExclusiveLock, false,
78 							 InvalidOid);
79 }
80 
81 static void
82 CheckAndCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode,
83 						 bool check, Oid OIDOldToast)
84 {
85 	Relation	rel;
86 
87 	rel = table_open(relOid, lockmode);
88 
89 	/* create_toast_table does all the work */
90 	(void) create_toast_table(rel, InvalidOid, InvalidOid, reloptions, lockmode,
91 							  check, OIDOldToast);
92 
93 	table_close(rel, NoLock);
94 }
95 
96 /*
97  * Create a toast table during bootstrap
98  *
99  * Here we need to prespecify the OIDs of the toast table and its index
100  */
101 void
102 BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid)
103 {
104 	Relation	rel;
105 
106 	rel = table_openrv(makeRangeVar(NULL, relName, -1), AccessExclusiveLock);
107 
108 	if (rel->rd_rel->relkind != RELKIND_RELATION &&
109 		rel->rd_rel->relkind != RELKIND_MATVIEW)
110 		ereport(ERROR,
111 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
112 				 errmsg("\"%s\" is not a table or materialized view",
113 						relName)));
114 
115 	/* create_toast_table does all the work */
116 	if (!create_toast_table(rel, toastOid, toastIndexOid, (Datum) 0,
117 							AccessExclusiveLock, false, InvalidOid))
118 		elog(ERROR, "\"%s\" does not require a toast table",
119 			 relName);
120 
121 	table_close(rel, NoLock);
122 }
123 
124 
125 /*
126  * create_toast_table --- internal workhorse
127  *
128  * rel is already opened and locked
129  * toastOid and toastIndexOid are normally InvalidOid, but during
130  * bootstrap they can be nonzero to specify hand-assigned OIDs
131  */
132 static bool
133 create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
134 				   Datum reloptions, LOCKMODE lockmode, bool check,
135 				   Oid OIDOldToast)
136 {
137 	Oid			relOid = RelationGetRelid(rel);
138 	HeapTuple	reltup;
139 	TupleDesc	tupdesc;
140 	bool		shared_relation;
141 	bool		mapped_relation;
142 	Relation	toast_rel;
143 	Relation	class_rel;
144 	Oid			toast_relid;
145 	Oid			toast_typid = InvalidOid;
146 	Oid			namespaceid;
147 	char		toast_relname[NAMEDATALEN];
148 	char		toast_idxname[NAMEDATALEN];
149 	IndexInfo  *indexInfo;
150 	Oid			collationObjectId[2];
151 	Oid			classObjectId[2];
152 	int16		coloptions[2];
153 	ObjectAddress baseobject,
154 				toastobject;
155 
156 	/*
157 	 * Is it already toasted?
158 	 */
159 	if (rel->rd_rel->reltoastrelid != InvalidOid)
160 		return false;
161 
162 	/*
163 	 * Check to see whether the table actually needs a TOAST table.
164 	 */
165 	if (!IsBinaryUpgrade)
166 	{
167 		/* Normal mode, normal check */
168 		if (!needs_toast_table(rel))
169 			return false;
170 	}
171 	else
172 	{
173 		/*
174 		 * In binary-upgrade mode, create a TOAST table if and only if
175 		 * pg_upgrade told us to (ie, a TOAST table OID has been provided).
176 		 *
177 		 * This indicates that the old cluster had a TOAST table for the
178 		 * current table.  We must create a TOAST table to receive the old
179 		 * TOAST file, even if the table seems not to need one.
180 		 *
181 		 * Contrariwise, if the old cluster did not have a TOAST table, we
182 		 * should be able to get along without one even if the new version's
183 		 * needs_toast_table rules suggest we should have one.  There is a lot
184 		 * of daylight between where we will create a TOAST table and where
185 		 * one is really necessary to avoid failures, so small cross-version
186 		 * differences in the when-to-create heuristic shouldn't be a problem.
187 		 * If we tried to create a TOAST table anyway, we would have the
188 		 * problem that it might take up an OID that will conflict with some
189 		 * old-cluster table we haven't seen yet.
190 		 */
191 		if (!OidIsValid(binary_upgrade_next_toast_pg_class_oid) ||
192 			!OidIsValid(binary_upgrade_next_toast_pg_type_oid))
193 			return false;
194 	}
195 
196 	/*
197 	 * If requested check lockmode is sufficient. This is a cross check in
198 	 * case of errors or conflicting decisions in earlier code.
199 	 */
200 	if (check && lockmode != AccessExclusiveLock)
201 		elog(ERROR, "AccessExclusiveLock required to add toast table.");
202 
203 	/*
204 	 * Create the toast table and its index
205 	 */
206 	snprintf(toast_relname, sizeof(toast_relname),
207 			 "pg_toast_%u", relOid);
208 	snprintf(toast_idxname, sizeof(toast_idxname),
209 			 "pg_toast_%u_index", relOid);
210 
211 	/* this is pretty painful...  need a tuple descriptor */
212 	tupdesc = CreateTemplateTupleDesc(3);
213 	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
214 					   "chunk_id",
215 					   OIDOID,
216 					   -1, 0);
217 	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
218 					   "chunk_seq",
219 					   INT4OID,
220 					   -1, 0);
221 	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
222 					   "chunk_data",
223 					   BYTEAOID,
224 					   -1, 0);
225 
226 	/*
227 	 * Ensure that the toast table doesn't itself get toasted, or we'll be
228 	 * toast :-(.  This is essential for chunk_data because type bytea is
229 	 * toastable; hit the other two just to be sure.
230 	 */
231 	TupleDescAttr(tupdesc, 0)->attstorage = 'p';
232 	TupleDescAttr(tupdesc, 1)->attstorage = 'p';
233 	TupleDescAttr(tupdesc, 2)->attstorage = 'p';
234 
235 	/*
236 	 * Toast tables for regular relations go in pg_toast; those for temp
237 	 * relations go into the per-backend temp-toast-table namespace.
238 	 */
239 	if (isTempOrTempToastNamespace(rel->rd_rel->relnamespace))
240 		namespaceid = GetTempToastNamespace();
241 	else
242 		namespaceid = PG_TOAST_NAMESPACE;
243 
244 	/*
245 	 * Use binary-upgrade override for pg_type.oid, if supplied.  We might be
246 	 * in the post-schema-restore phase where we are doing ALTER TABLE to
247 	 * create TOAST tables that didn't exist in the old cluster.
248 	 */
249 	if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid))
250 	{
251 		toast_typid = binary_upgrade_next_toast_pg_type_oid;
252 		binary_upgrade_next_toast_pg_type_oid = InvalidOid;
253 	}
254 
255 	/* Toast table is shared if and only if its parent is. */
256 	shared_relation = rel->rd_rel->relisshared;
257 
258 	/* It's mapped if and only if its parent is, too */
259 	mapped_relation = RelationIsMapped(rel);
260 
261 	toast_relid = heap_create_with_catalog(toast_relname,
262 										   namespaceid,
263 										   rel->rd_rel->reltablespace,
264 										   toastOid,
265 										   toast_typid,
266 										   InvalidOid,
267 										   rel->rd_rel->relowner,
268 										   rel->rd_rel->relam,
269 										   tupdesc,
270 										   NIL,
271 										   RELKIND_TOASTVALUE,
272 										   rel->rd_rel->relpersistence,
273 										   shared_relation,
274 										   mapped_relation,
275 										   ONCOMMIT_NOOP,
276 										   reloptions,
277 										   false,
278 										   true,
279 										   true,
280 										   OIDOldToast,
281 										   NULL);
282 	Assert(toast_relid != InvalidOid);
283 
284 	/* make the toast relation visible, else table_open will fail */
285 	CommandCounterIncrement();
286 
287 	/* ShareLock is not really needed here, but take it anyway */
288 	toast_rel = table_open(toast_relid, ShareLock);
289 
290 	/*
291 	 * Create unique index on chunk_id, chunk_seq.
292 	 *
293 	 * NOTE: the normal TOAST access routines could actually function with a
294 	 * single-column index on chunk_id only. However, the slice access
295 	 * routines use both columns for faster access to an individual chunk. In
296 	 * addition, we want it to be unique as a check against the possibility of
297 	 * duplicate TOAST chunk OIDs. The index might also be a little more
298 	 * efficient this way, since btree isn't all that happy with large numbers
299 	 * of equal keys.
300 	 */
301 
302 	indexInfo = makeNode(IndexInfo);
303 	indexInfo->ii_NumIndexAttrs = 2;
304 	indexInfo->ii_NumIndexKeyAttrs = 2;
305 	indexInfo->ii_IndexAttrNumbers[0] = 1;
306 	indexInfo->ii_IndexAttrNumbers[1] = 2;
307 	indexInfo->ii_Expressions = NIL;
308 	indexInfo->ii_ExpressionsState = NIL;
309 	indexInfo->ii_Predicate = NIL;
310 	indexInfo->ii_PredicateState = NULL;
311 	indexInfo->ii_ExclusionOps = NULL;
312 	indexInfo->ii_ExclusionProcs = NULL;
313 	indexInfo->ii_ExclusionStrats = NULL;
314 	indexInfo->ii_Unique = true;
315 	indexInfo->ii_ReadyForInserts = true;
316 	indexInfo->ii_Concurrent = false;
317 	indexInfo->ii_BrokenHotChain = false;
318 	indexInfo->ii_ParallelWorkers = 0;
319 	indexInfo->ii_Am = BTREE_AM_OID;
320 	indexInfo->ii_AmCache = NULL;
321 	indexInfo->ii_Context = CurrentMemoryContext;
322 
323 	collationObjectId[0] = InvalidOid;
324 	collationObjectId[1] = InvalidOid;
325 
326 	classObjectId[0] = OID_BTREE_OPS_OID;
327 	classObjectId[1] = INT4_BTREE_OPS_OID;
328 
329 	coloptions[0] = 0;
330 	coloptions[1] = 0;
331 
332 	index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid,
333 				 InvalidOid, InvalidOid,
334 				 indexInfo,
335 				 list_make2("chunk_id", "chunk_seq"),
336 				 BTREE_AM_OID,
337 				 rel->rd_rel->reltablespace,
338 				 collationObjectId, classObjectId, coloptions, (Datum) 0,
339 				 INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL);
340 
341 	table_close(toast_rel, NoLock);
342 
343 	/*
344 	 * Store the toast table's OID in the parent relation's pg_class row
345 	 */
346 	class_rel = table_open(RelationRelationId, RowExclusiveLock);
347 
348 	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
349 	if (!HeapTupleIsValid(reltup))
350 		elog(ERROR, "cache lookup failed for relation %u", relOid);
351 
352 	((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;
353 
354 	if (!IsBootstrapProcessingMode())
355 	{
356 		/* normal case, use a transactional update */
357 		CatalogTupleUpdate(class_rel, &reltup->t_self, reltup);
358 	}
359 	else
360 	{
361 		/* While bootstrapping, we cannot UPDATE, so overwrite in-place */
362 		heap_inplace_update(class_rel, reltup);
363 	}
364 
365 	heap_freetuple(reltup);
366 
367 	table_close(class_rel, RowExclusiveLock);
368 
369 	/*
370 	 * Register dependency from the toast table to the master, so that the
371 	 * toast table will be deleted if the master is.  Skip this in bootstrap
372 	 * mode.
373 	 */
374 	if (!IsBootstrapProcessingMode())
375 	{
376 		baseobject.classId = RelationRelationId;
377 		baseobject.objectId = relOid;
378 		baseobject.objectSubId = 0;
379 		toastobject.classId = RelationRelationId;
380 		toastobject.objectId = toast_relid;
381 		toastobject.objectSubId = 0;
382 
383 		recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
384 	}
385 
386 	/*
387 	 * Make changes visible
388 	 */
389 	CommandCounterIncrement();
390 
391 	return true;
392 }
393 
394 /*
395  * Check to see whether the table needs a TOAST table.
396  */
397 static bool
398 needs_toast_table(Relation rel)
399 {
400 	/*
401 	 * No need to create a TOAST table for partitioned tables.
402 	 */
403 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
404 		return false;
405 
406 	/*
407 	 * We cannot allow toasting a shared relation after initdb (because
408 	 * there's no way to mark it toasted in other databases' pg_class).
409 	 */
410 	if (rel->rd_rel->relisshared && !IsBootstrapProcessingMode())
411 		return false;
412 
413 	/*
414 	 * Ignore attempts to create toast tables on catalog tables after initdb.
415 	 * Which catalogs get toast tables is explicitly chosen in
416 	 * catalog/toasting.h.  (We could get here via some ALTER TABLE command if
417 	 * the catalog doesn't have a toast table.)
418 	 */
419 	if (IsCatalogRelation(rel) && !IsBootstrapProcessingMode())
420 		return false;
421 
422 	/* Otherwise, let the AM decide. */
423 	return table_relation_needs_toast_table(rel);
424 }
425