1 /*-------------------------------------------------------------------------
2  *
3  * pgstatapprox.c
4  *		  Bloat estimation functions
5  *
6  * Copyright (c) 2014-2016, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		  contrib/pgstattuple/pgstatapprox.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/visibilitymap.h"
16 #include "access/transam.h"
17 #include "access/xact.h"
18 #include "access/multixact.h"
19 #include "access/htup_details.h"
20 #include "catalog/namespace.h"
21 #include "funcapi.h"
22 #include "miscadmin.h"
23 #include "storage/bufmgr.h"
24 #include "storage/freespace.h"
25 #include "storage/procarray.h"
26 #include "storage/lmgr.h"
27 #include "utils/builtins.h"
28 #include "utils/tqual.h"
29 #include "commands/vacuum.h"
30 
31 PG_FUNCTION_INFO_V1(pgstattuple_approx);
32 
33 typedef struct output_type
34 {
35 	uint64		table_len;
36 	uint64		scanned_percent;
37 	uint64		tuple_count;
38 	uint64		tuple_len;
39 	double		tuple_percent;
40 	uint64		dead_tuple_count;
41 	uint64		dead_tuple_len;
42 	double		dead_tuple_percent;
43 	uint64		free_space;
44 	double		free_percent;
45 } output_type;
46 
47 #define NUM_OUTPUT_COLUMNS 10
48 
49 /*
50  * This function takes an already open relation and scans its pages,
51  * skipping those that have the corresponding visibility map bit set.
52  * For pages we skip, we find the free space from the free space map
53  * and approximate tuple_len on that basis. For the others, we count
54  * the exact number of dead tuples etc.
55  *
56  * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
57  * we do not try to avoid skipping single pages.
58  */
59 static void
statapprox_heap(Relation rel,output_type * stat)60 statapprox_heap(Relation rel, output_type *stat)
61 {
62 	BlockNumber scanned,
63 				nblocks,
64 				blkno;
65 	Buffer		vmbuffer = InvalidBuffer;
66 	BufferAccessStrategy bstrategy;
67 	TransactionId OldestXmin;
68 	uint64		misc_count = 0;
69 
70 	OldestXmin = GetOldestXmin(rel, true);
71 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
72 
73 	nblocks = RelationGetNumberOfBlocks(rel);
74 	scanned = 0;
75 
76 	for (blkno = 0; blkno < nblocks; blkno++)
77 	{
78 		Buffer		buf;
79 		Page		page;
80 		OffsetNumber offnum,
81 					maxoff;
82 		Size		freespace;
83 
84 		CHECK_FOR_INTERRUPTS();
85 
86 		/*
87 		 * If the page has only visible tuples, then we can find out the free
88 		 * space from the FSM and move on.
89 		 */
90 		if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
91 		{
92 			freespace = GetRecordedFreeSpace(rel, blkno);
93 			stat->tuple_len += BLCKSZ - freespace;
94 			stat->free_space += freespace;
95 			continue;
96 		}
97 
98 		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
99 								 RBM_NORMAL, bstrategy);
100 
101 		LockBuffer(buf, BUFFER_LOCK_SHARE);
102 
103 		page = BufferGetPage(buf);
104 
105 		/*
106 		 * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
107 		 * treat them as being free space for our purposes.
108 		 */
109 		if (!PageIsNew(page))
110 			stat->free_space += PageGetHeapFreeSpace(page);
111 		else
112 			stat->free_space += BLCKSZ - SizeOfPageHeaderData;
113 
114 		if (PageIsNew(page) || PageIsEmpty(page))
115 		{
116 			UnlockReleaseBuffer(buf);
117 			continue;
118 		}
119 
120 		scanned++;
121 
122 		/*
123 		 * Look at each tuple on the page and decide whether it's live or
124 		 * dead, then count it and its size. Unlike lazy_scan_heap, we can
125 		 * afford to ignore problems and special cases.
126 		 */
127 		maxoff = PageGetMaxOffsetNumber(page);
128 
129 		for (offnum = FirstOffsetNumber;
130 			 offnum <= maxoff;
131 			 offnum = OffsetNumberNext(offnum))
132 		{
133 			ItemId		itemid;
134 			HeapTupleData tuple;
135 
136 			itemid = PageGetItemId(page, offnum);
137 
138 			if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
139 				ItemIdIsDead(itemid))
140 			{
141 				continue;
142 			}
143 
144 			Assert(ItemIdIsNormal(itemid));
145 
146 			ItemPointerSet(&(tuple.t_self), blkno, offnum);
147 
148 			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
149 			tuple.t_len = ItemIdGetLength(itemid);
150 			tuple.t_tableOid = RelationGetRelid(rel);
151 
152 			/*
153 			 * We count live and dead tuples, but we also need to add up
154 			 * others in order to feed vac_estimate_reltuples.
155 			 */
156 			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
157 			{
158 				case HEAPTUPLE_RECENTLY_DEAD:
159 					misc_count++;
160 					/* Fall through */
161 				case HEAPTUPLE_DEAD:
162 					stat->dead_tuple_len += tuple.t_len;
163 					stat->dead_tuple_count++;
164 					break;
165 				case HEAPTUPLE_LIVE:
166 					stat->tuple_len += tuple.t_len;
167 					stat->tuple_count++;
168 					break;
169 				case HEAPTUPLE_INSERT_IN_PROGRESS:
170 				case HEAPTUPLE_DELETE_IN_PROGRESS:
171 					misc_count++;
172 					break;
173 				default:
174 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
175 					break;
176 			}
177 		}
178 
179 		UnlockReleaseBuffer(buf);
180 	}
181 
182 	stat->table_len = (uint64) nblocks *BLCKSZ;
183 
184 	stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
185 											 stat->tuple_count + misc_count);
186 
187 	/*
188 	 * Calculate percentages if the relation has one or more pages.
189 	 */
190 	if (nblocks != 0)
191 	{
192 		stat->scanned_percent = 100 * scanned / nblocks;
193 		stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
194 		stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
195 		stat->free_percent = 100.0 * stat->free_space / stat->table_len;
196 	}
197 
198 	if (BufferIsValid(vmbuffer))
199 	{
200 		ReleaseBuffer(vmbuffer);
201 		vmbuffer = InvalidBuffer;
202 	}
203 }
204 
205 /*
206  * Returns estimated live/dead tuple statistics for the given relid.
207  */
208 Datum
pgstattuple_approx(PG_FUNCTION_ARGS)209 pgstattuple_approx(PG_FUNCTION_ARGS)
210 {
211 	Oid			relid = PG_GETARG_OID(0);
212 	Relation	rel;
213 	output_type stat = {0};
214 	TupleDesc	tupdesc;
215 	bool		nulls[NUM_OUTPUT_COLUMNS];
216 	Datum		values[NUM_OUTPUT_COLUMNS];
217 	HeapTuple	ret;
218 	int			i = 0;
219 
220 	if (!superuser())
221 		ereport(ERROR,
222 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
223 				 (errmsg("must be superuser to use pgstattuple functions"))));
224 
225 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
226 		elog(ERROR, "return type must be a row type");
227 
228 	if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
229 		elog(ERROR, "incorrect number of output arguments");
230 
231 	rel = relation_open(relid, AccessShareLock);
232 
233 	/*
234 	 * Reject attempts to read non-local temporary relations; we would be
235 	 * likely to get wrong data since we have no visibility into the owning
236 	 * session's local buffers.
237 	 */
238 	if (RELATION_IS_OTHER_TEMP(rel))
239 		ereport(ERROR,
240 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241 				 errmsg("cannot access temporary tables of other sessions")));
242 
243 	/*
244 	 * We support only ordinary relations and materialised views, because we
245 	 * depend on the visibility map and free space map for our estimates about
246 	 * unscanned pages.
247 	 */
248 	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
249 		  rel->rd_rel->relkind == RELKIND_MATVIEW))
250 		ereport(ERROR,
251 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
252 				 errmsg("\"%s\" is not a table or materialized view",
253 						RelationGetRelationName(rel))));
254 
255 	statapprox_heap(rel, &stat);
256 
257 	relation_close(rel, AccessShareLock);
258 
259 	memset(nulls, 0, sizeof(nulls));
260 
261 	values[i++] = Int64GetDatum(stat.table_len);
262 	values[i++] = Float8GetDatum(stat.scanned_percent);
263 	values[i++] = Int64GetDatum(stat.tuple_count);
264 	values[i++] = Int64GetDatum(stat.tuple_len);
265 	values[i++] = Float8GetDatum(stat.tuple_percent);
266 	values[i++] = Int64GetDatum(stat.dead_tuple_count);
267 	values[i++] = Int64GetDatum(stat.dead_tuple_len);
268 	values[i++] = Float8GetDatum(stat.dead_tuple_percent);
269 	values[i++] = Int64GetDatum(stat.free_space);
270 	values[i++] = Float8GetDatum(stat.free_percent);
271 
272 	ret = heap_form_tuple(tupdesc, values, nulls);
273 	return HeapTupleGetDatum(ret);
274 }
275