1 /*-------------------------------------------------------------------------
2  *
3  * pgstatapprox.c
4  *		  Bloat estimation functions
5  *
6  * Copyright (c) 2014-2017, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		  contrib/pgstattuple/pgstatapprox.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/visibilitymap.h"
16 #include "access/transam.h"
17 #include "access/xact.h"
18 #include "access/multixact.h"
19 #include "access/htup_details.h"
20 #include "catalog/namespace.h"
21 #include "funcapi.h"
22 #include "miscadmin.h"
23 #include "storage/bufmgr.h"
24 #include "storage/freespace.h"
25 #include "storage/procarray.h"
26 #include "storage/lmgr.h"
27 #include "utils/builtins.h"
28 #include "utils/tqual.h"
29 #include "commands/vacuum.h"
30 
31 PG_FUNCTION_INFO_V1(pgstattuple_approx);
32 PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
33 
34 Datum		pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);
35 
36 typedef struct output_type
37 {
38 	uint64		table_len;
39 	uint64		scanned_percent;
40 	uint64		tuple_count;
41 	uint64		tuple_len;
42 	double		tuple_percent;
43 	uint64		dead_tuple_count;
44 	uint64		dead_tuple_len;
45 	double		dead_tuple_percent;
46 	uint64		free_space;
47 	double		free_percent;
48 } output_type;
49 
50 #define NUM_OUTPUT_COLUMNS 10
51 
52 /*
53  * This function takes an already open relation and scans its pages,
54  * skipping those that have the corresponding visibility map bit set.
55  * For pages we skip, we find the free space from the free space map
56  * and approximate tuple_len on that basis. For the others, we count
57  * the exact number of dead tuples etc.
58  *
59  * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
60  * we do not try to avoid skipping single pages.
61  */
62 static void
statapprox_heap(Relation rel,output_type * stat)63 statapprox_heap(Relation rel, output_type *stat)
64 {
65 	BlockNumber scanned,
66 				nblocks,
67 				blkno;
68 	Buffer		vmbuffer = InvalidBuffer;
69 	BufferAccessStrategy bstrategy;
70 	TransactionId OldestXmin;
71 	uint64		misc_count = 0;
72 
73 	OldestXmin = GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM);
74 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
75 
76 	nblocks = RelationGetNumberOfBlocks(rel);
77 	scanned = 0;
78 
79 	for (blkno = 0; blkno < nblocks; blkno++)
80 	{
81 		Buffer		buf;
82 		Page		page;
83 		OffsetNumber offnum,
84 					maxoff;
85 		Size		freespace;
86 
87 		CHECK_FOR_INTERRUPTS();
88 
89 		/*
90 		 * If the page has only visible tuples, then we can find out the free
91 		 * space from the FSM and move on.
92 		 */
93 		if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
94 		{
95 			freespace = GetRecordedFreeSpace(rel, blkno);
96 			stat->tuple_len += BLCKSZ - freespace;
97 			stat->free_space += freespace;
98 			continue;
99 		}
100 
101 		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
102 								 RBM_NORMAL, bstrategy);
103 
104 		LockBuffer(buf, BUFFER_LOCK_SHARE);
105 
106 		page = BufferGetPage(buf);
107 
108 		/*
109 		 * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
110 		 * treat them as being free space for our purposes.
111 		 */
112 		if (!PageIsNew(page))
113 			stat->free_space += PageGetHeapFreeSpace(page);
114 		else
115 			stat->free_space += BLCKSZ - SizeOfPageHeaderData;
116 
117 		if (PageIsNew(page) || PageIsEmpty(page))
118 		{
119 			UnlockReleaseBuffer(buf);
120 			continue;
121 		}
122 
123 		scanned++;
124 
125 		/*
126 		 * Look at each tuple on the page and decide whether it's live or
127 		 * dead, then count it and its size. Unlike lazy_scan_heap, we can
128 		 * afford to ignore problems and special cases.
129 		 */
130 		maxoff = PageGetMaxOffsetNumber(page);
131 
132 		for (offnum = FirstOffsetNumber;
133 			 offnum <= maxoff;
134 			 offnum = OffsetNumberNext(offnum))
135 		{
136 			ItemId		itemid;
137 			HeapTupleData tuple;
138 
139 			itemid = PageGetItemId(page, offnum);
140 
141 			if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
142 				ItemIdIsDead(itemid))
143 			{
144 				continue;
145 			}
146 
147 			Assert(ItemIdIsNormal(itemid));
148 
149 			ItemPointerSet(&(tuple.t_self), blkno, offnum);
150 
151 			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
152 			tuple.t_len = ItemIdGetLength(itemid);
153 			tuple.t_tableOid = RelationGetRelid(rel);
154 
155 			/*
156 			 * We count live and dead tuples, but we also need to add up
157 			 * others in order to feed vac_estimate_reltuples.
158 			 */
159 			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
160 			{
161 				case HEAPTUPLE_RECENTLY_DEAD:
162 					misc_count++;
163 					/* Fall through */
164 				case HEAPTUPLE_DEAD:
165 					stat->dead_tuple_len += tuple.t_len;
166 					stat->dead_tuple_count++;
167 					break;
168 				case HEAPTUPLE_LIVE:
169 					stat->tuple_len += tuple.t_len;
170 					stat->tuple_count++;
171 					break;
172 				case HEAPTUPLE_INSERT_IN_PROGRESS:
173 				case HEAPTUPLE_DELETE_IN_PROGRESS:
174 					misc_count++;
175 					break;
176 				default:
177 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
178 					break;
179 			}
180 		}
181 
182 		UnlockReleaseBuffer(buf);
183 	}
184 
185 	stat->table_len = (uint64) nblocks * BLCKSZ;
186 
187 	stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
188 											   stat->tuple_count + misc_count);
189 
190 	/*
191 	 * Calculate percentages if the relation has one or more pages.
192 	 */
193 	if (nblocks != 0)
194 	{
195 		stat->scanned_percent = 100 * scanned / nblocks;
196 		stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
197 		stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
198 		stat->free_percent = 100.0 * stat->free_space / stat->table_len;
199 	}
200 
201 	if (BufferIsValid(vmbuffer))
202 	{
203 		ReleaseBuffer(vmbuffer);
204 		vmbuffer = InvalidBuffer;
205 	}
206 }
207 
208 /*
209  * Returns estimated live/dead tuple statistics for the given relid.
210  *
211  * The superuser() check here must be kept as the library might be upgraded
212  * without the extension being upgraded, meaning that in pre-1.5 installations
213  * these functions could be called by any user.
214  */
215 Datum
pgstattuple_approx(PG_FUNCTION_ARGS)216 pgstattuple_approx(PG_FUNCTION_ARGS)
217 {
218 	Oid			relid = PG_GETARG_OID(0);
219 
220 	if (!superuser())
221 		ereport(ERROR,
222 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
223 				 (errmsg("must be superuser to use pgstattuple functions"))));
224 
225 	PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
226 }
227 
228 /*
229  * As of pgstattuple version 1.5, we no longer need to check if the user
230  * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
231  * Users can then grant access to it based on their policies.
232  *
233  * Otherwise identical to pgstattuple_approx (above).
234  */
235 Datum
pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)236 pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
237 {
238 	Oid			relid = PG_GETARG_OID(0);
239 
240 	PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
241 }
242 
243 Datum
pgstattuple_approx_internal(Oid relid,FunctionCallInfo fcinfo)244 pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
245 {
246 	Relation	rel;
247 	output_type stat = {0};
248 	TupleDesc	tupdesc;
249 	bool		nulls[NUM_OUTPUT_COLUMNS];
250 	Datum		values[NUM_OUTPUT_COLUMNS];
251 	HeapTuple	ret;
252 	int			i = 0;
253 
254 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
255 		elog(ERROR, "return type must be a row type");
256 
257 	if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
258 		elog(ERROR, "incorrect number of output arguments");
259 
260 	rel = relation_open(relid, AccessShareLock);
261 
262 	/*
263 	 * Reject attempts to read non-local temporary relations; we would be
264 	 * likely to get wrong data since we have no visibility into the owning
265 	 * session's local buffers.
266 	 */
267 	if (RELATION_IS_OTHER_TEMP(rel))
268 		ereport(ERROR,
269 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
270 				 errmsg("cannot access temporary tables of other sessions")));
271 
272 	/*
273 	 * We support only ordinary relations and materialised views, because we
274 	 * depend on the visibility map and free space map for our estimates about
275 	 * unscanned pages.
276 	 */
277 	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
278 		  rel->rd_rel->relkind == RELKIND_MATVIEW))
279 		ereport(ERROR,
280 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
281 				 errmsg("\"%s\" is not a table or materialized view",
282 						RelationGetRelationName(rel))));
283 
284 	statapprox_heap(rel, &stat);
285 
286 	relation_close(rel, AccessShareLock);
287 
288 	memset(nulls, 0, sizeof(nulls));
289 
290 	values[i++] = Int64GetDatum(stat.table_len);
291 	values[i++] = Float8GetDatum(stat.scanned_percent);
292 	values[i++] = Int64GetDatum(stat.tuple_count);
293 	values[i++] = Int64GetDatum(stat.tuple_len);
294 	values[i++] = Float8GetDatum(stat.tuple_percent);
295 	values[i++] = Int64GetDatum(stat.dead_tuple_count);
296 	values[i++] = Int64GetDatum(stat.dead_tuple_len);
297 	values[i++] = Float8GetDatum(stat.dead_tuple_percent);
298 	values[i++] = Int64GetDatum(stat.free_space);
299 	values[i++] = Float8GetDatum(stat.free_percent);
300 
301 	ret = heap_form_tuple(tupdesc, values, nulls);
302 	return HeapTupleGetDatum(ret);
303 }
304