1 /*-------------------------------------------------------------------------
2  *
3  * pgstatapprox.c
4  *		  Bloat estimation functions
5  *
6  * Copyright (c) 2014-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		  contrib/pgstattuple/pgstatapprox.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/heapam.h"
16 #include "access/htup_details.h"
17 #include "access/multixact.h"
18 #include "access/relation.h"
19 #include "access/transam.h"
20 #include "access/visibilitymap.h"
21 #include "access/xact.h"
22 #include "catalog/namespace.h"
23 #include "catalog/pg_am_d.h"
24 #include "commands/vacuum.h"
25 #include "funcapi.h"
26 #include "miscadmin.h"
27 #include "storage/bufmgr.h"
28 #include "storage/freespace.h"
29 #include "storage/lmgr.h"
30 #include "storage/procarray.h"
31 #include "utils/builtins.h"
32 
33 PG_FUNCTION_INFO_V1(pgstattuple_approx);
34 PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
35 
36 Datum		pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);
37 
38 typedef struct output_type
39 {
40 	uint64		table_len;
41 	uint64		scanned_percent;
42 	uint64		tuple_count;
43 	uint64		tuple_len;
44 	double		tuple_percent;
45 	uint64		dead_tuple_count;
46 	uint64		dead_tuple_len;
47 	double		dead_tuple_percent;
48 	uint64		free_space;
49 	double		free_percent;
50 } output_type;
51 
52 #define NUM_OUTPUT_COLUMNS 10
53 
54 /*
55  * This function takes an already open relation and scans its pages,
56  * skipping those that have the corresponding visibility map bit set.
57  * For pages we skip, we find the free space from the free space map
58  * and approximate tuple_len on that basis. For the others, we count
59  * the exact number of dead tuples etc.
60  *
61  * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
62  * we do not try to avoid skipping single pages.
63  */
64 static void
statapprox_heap(Relation rel,output_type * stat)65 statapprox_heap(Relation rel, output_type *stat)
66 {
67 	BlockNumber scanned,
68 				nblocks,
69 				blkno;
70 	Buffer		vmbuffer = InvalidBuffer;
71 	BufferAccessStrategy bstrategy;
72 	TransactionId OldestXmin;
73 
74 	OldestXmin = GetOldestNonRemovableTransactionId(rel);
75 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
76 
77 	nblocks = RelationGetNumberOfBlocks(rel);
78 	scanned = 0;
79 
80 	for (blkno = 0; blkno < nblocks; blkno++)
81 	{
82 		Buffer		buf;
83 		Page		page;
84 		OffsetNumber offnum,
85 					maxoff;
86 		Size		freespace;
87 
88 		CHECK_FOR_INTERRUPTS();
89 
90 		/*
91 		 * If the page has only visible tuples, then we can find out the free
92 		 * space from the FSM and move on.
93 		 */
94 		if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
95 		{
96 			freespace = GetRecordedFreeSpace(rel, blkno);
97 			stat->tuple_len += BLCKSZ - freespace;
98 			stat->free_space += freespace;
99 			continue;
100 		}
101 
102 		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
103 								 RBM_NORMAL, bstrategy);
104 
105 		LockBuffer(buf, BUFFER_LOCK_SHARE);
106 
107 		page = BufferGetPage(buf);
108 
109 		/*
110 		 * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
111 		 * treat them as being free space for our purposes.
112 		 */
113 		if (!PageIsNew(page))
114 			stat->free_space += PageGetHeapFreeSpace(page);
115 		else
116 			stat->free_space += BLCKSZ - SizeOfPageHeaderData;
117 
118 		/* We may count the page as scanned even if it's new/empty */
119 		scanned++;
120 
121 		if (PageIsNew(page) || PageIsEmpty(page))
122 		{
123 			UnlockReleaseBuffer(buf);
124 			continue;
125 		}
126 
127 		/*
128 		 * Look at each tuple on the page and decide whether it's live or
129 		 * dead, then count it and its size. Unlike lazy_scan_heap, we can
130 		 * afford to ignore problems and special cases.
131 		 */
132 		maxoff = PageGetMaxOffsetNumber(page);
133 
134 		for (offnum = FirstOffsetNumber;
135 			 offnum <= maxoff;
136 			 offnum = OffsetNumberNext(offnum))
137 		{
138 			ItemId		itemid;
139 			HeapTupleData tuple;
140 
141 			itemid = PageGetItemId(page, offnum);
142 
143 			if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
144 				ItemIdIsDead(itemid))
145 			{
146 				continue;
147 			}
148 
149 			Assert(ItemIdIsNormal(itemid));
150 
151 			ItemPointerSet(&(tuple.t_self), blkno, offnum);
152 
153 			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
154 			tuple.t_len = ItemIdGetLength(itemid);
155 			tuple.t_tableOid = RelationGetRelid(rel);
156 
157 			/*
158 			 * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
159 			 * as "dead" while DELETE_IN_PROGRESS tuples are "live".  We don't
160 			 * bother distinguishing tuples inserted/deleted by our own
161 			 * transaction.
162 			 */
163 			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
164 			{
165 				case HEAPTUPLE_LIVE:
166 				case HEAPTUPLE_DELETE_IN_PROGRESS:
167 					stat->tuple_len += tuple.t_len;
168 					stat->tuple_count++;
169 					break;
170 				case HEAPTUPLE_DEAD:
171 				case HEAPTUPLE_RECENTLY_DEAD:
172 				case HEAPTUPLE_INSERT_IN_PROGRESS:
173 					stat->dead_tuple_len += tuple.t_len;
174 					stat->dead_tuple_count++;
175 					break;
176 				default:
177 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
178 					break;
179 			}
180 		}
181 
182 		UnlockReleaseBuffer(buf);
183 	}
184 
185 	stat->table_len = (uint64) nblocks * BLCKSZ;
186 
187 	/*
188 	 * We don't know how many tuples are in the pages we didn't scan, so
189 	 * extrapolate the live-tuple count to the whole table in the same way
190 	 * that VACUUM does.  (Like VACUUM, we're not taking a random sample, so
191 	 * just extrapolating linearly seems unsafe.)  There should be no dead
192 	 * tuples in all-visible pages, so no correction is needed for that, and
193 	 * we already accounted for the space in those pages, too.
194 	 */
195 	stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
196 											   stat->tuple_count);
197 
198 	/* It's not clear if we could get -1 here, but be safe. */
199 	stat->tuple_count = Max(stat->tuple_count, 0);
200 
201 	/*
202 	 * Calculate percentages if the relation has one or more pages.
203 	 */
204 	if (nblocks != 0)
205 	{
206 		stat->scanned_percent = 100 * scanned / nblocks;
207 		stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
208 		stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
209 		stat->free_percent = 100.0 * stat->free_space / stat->table_len;
210 	}
211 
212 	if (BufferIsValid(vmbuffer))
213 	{
214 		ReleaseBuffer(vmbuffer);
215 		vmbuffer = InvalidBuffer;
216 	}
217 }
218 
219 /*
220  * Returns estimated live/dead tuple statistics for the given relid.
221  *
222  * The superuser() check here must be kept as the library might be upgraded
223  * without the extension being upgraded, meaning that in pre-1.5 installations
224  * these functions could be called by any user.
225  */
226 Datum
pgstattuple_approx(PG_FUNCTION_ARGS)227 pgstattuple_approx(PG_FUNCTION_ARGS)
228 {
229 	Oid			relid = PG_GETARG_OID(0);
230 
231 	if (!superuser())
232 		ereport(ERROR,
233 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
234 				 errmsg("must be superuser to use pgstattuple functions")));
235 
236 	PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
237 }
238 
239 /*
240  * As of pgstattuple version 1.5, we no longer need to check if the user
241  * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
242  * Users can then grant access to it based on their policies.
243  *
244  * Otherwise identical to pgstattuple_approx (above).
245  */
246 Datum
pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)247 pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
248 {
249 	Oid			relid = PG_GETARG_OID(0);
250 
251 	PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
252 }
253 
254 Datum
pgstattuple_approx_internal(Oid relid,FunctionCallInfo fcinfo)255 pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
256 {
257 	Relation	rel;
258 	output_type stat = {0};
259 	TupleDesc	tupdesc;
260 	bool		nulls[NUM_OUTPUT_COLUMNS];
261 	Datum		values[NUM_OUTPUT_COLUMNS];
262 	HeapTuple	ret;
263 	int			i = 0;
264 
265 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
266 		elog(ERROR, "return type must be a row type");
267 
268 	if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
269 		elog(ERROR, "incorrect number of output arguments");
270 
271 	rel = relation_open(relid, AccessShareLock);
272 
273 	/*
274 	 * Reject attempts to read non-local temporary relations; we would be
275 	 * likely to get wrong data since we have no visibility into the owning
276 	 * session's local buffers.
277 	 */
278 	if (RELATION_IS_OTHER_TEMP(rel))
279 		ereport(ERROR,
280 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
281 				 errmsg("cannot access temporary tables of other sessions")));
282 
283 	/*
284 	 * We support only relation kinds with a visibility map and a free space
285 	 * map.
286 	 */
287 	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
288 		  rel->rd_rel->relkind == RELKIND_MATVIEW ||
289 		  rel->rd_rel->relkind == RELKIND_TOASTVALUE))
290 		ereport(ERROR,
291 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
292 				 errmsg("\"%s\" is not a table, materialized view, or TOAST table",
293 						RelationGetRelationName(rel))));
294 
295 	if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
296 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
297 						errmsg("only heap AM is supported")));
298 
299 	statapprox_heap(rel, &stat);
300 
301 	relation_close(rel, AccessShareLock);
302 
303 	memset(nulls, 0, sizeof(nulls));
304 
305 	values[i++] = Int64GetDatum(stat.table_len);
306 	values[i++] = Float8GetDatum(stat.scanned_percent);
307 	values[i++] = Int64GetDatum(stat.tuple_count);
308 	values[i++] = Int64GetDatum(stat.tuple_len);
309 	values[i++] = Float8GetDatum(stat.tuple_percent);
310 	values[i++] = Int64GetDatum(stat.dead_tuple_count);
311 	values[i++] = Int64GetDatum(stat.dead_tuple_len);
312 	values[i++] = Float8GetDatum(stat.dead_tuple_percent);
313 	values[i++] = Int64GetDatum(stat.free_space);
314 	values[i++] = Float8GetDatum(stat.free_percent);
315 
316 	ret = heap_form_tuple(tupdesc, values, nulls);
317 	return HeapTupleGetDatum(ret);
318 }
319