1 /*-------------------------------------------------------------------------
2 *
3 * pgstatapprox.c
4 * Bloat estimation functions
5 *
6 * Copyright (c) 2014-2017, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/pgstattuple/pgstatapprox.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "access/visibilitymap.h"
16 #include "access/transam.h"
17 #include "access/xact.h"
18 #include "access/multixact.h"
19 #include "access/htup_details.h"
20 #include "catalog/namespace.h"
21 #include "funcapi.h"
22 #include "miscadmin.h"
23 #include "storage/bufmgr.h"
24 #include "storage/freespace.h"
25 #include "storage/procarray.h"
26 #include "storage/lmgr.h"
27 #include "utils/builtins.h"
28 #include "utils/tqual.h"
29 #include "commands/vacuum.h"
30
31 PG_FUNCTION_INFO_V1(pgstattuple_approx);
32 PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
33
34 Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);
35
36 typedef struct output_type
37 {
38 uint64 table_len;
39 uint64 scanned_percent;
40 uint64 tuple_count;
41 uint64 tuple_len;
42 double tuple_percent;
43 uint64 dead_tuple_count;
44 uint64 dead_tuple_len;
45 double dead_tuple_percent;
46 uint64 free_space;
47 double free_percent;
48 } output_type;
49
50 #define NUM_OUTPUT_COLUMNS 10
51
52 /*
53 * This function takes an already open relation and scans its pages,
54 * skipping those that have the corresponding visibility map bit set.
55 * For pages we skip, we find the free space from the free space map
56 * and approximate tuple_len on that basis. For the others, we count
57 * the exact number of dead tuples etc.
58 *
59 * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
60 * we do not try to avoid skipping single pages.
61 */
62 static void
statapprox_heap(Relation rel,output_type * stat)63 statapprox_heap(Relation rel, output_type *stat)
64 {
65 BlockNumber scanned,
66 nblocks,
67 blkno;
68 Buffer vmbuffer = InvalidBuffer;
69 BufferAccessStrategy bstrategy;
70 TransactionId OldestXmin;
71 uint64 misc_count = 0;
72
73 OldestXmin = GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM);
74 bstrategy = GetAccessStrategy(BAS_BULKREAD);
75
76 nblocks = RelationGetNumberOfBlocks(rel);
77 scanned = 0;
78
79 for (blkno = 0; blkno < nblocks; blkno++)
80 {
81 Buffer buf;
82 Page page;
83 OffsetNumber offnum,
84 maxoff;
85 Size freespace;
86
87 CHECK_FOR_INTERRUPTS();
88
89 /*
90 * If the page has only visible tuples, then we can find out the free
91 * space from the FSM and move on.
92 */
93 if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
94 {
95 freespace = GetRecordedFreeSpace(rel, blkno);
96 stat->tuple_len += BLCKSZ - freespace;
97 stat->free_space += freespace;
98 continue;
99 }
100
101 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
102 RBM_NORMAL, bstrategy);
103
104 LockBuffer(buf, BUFFER_LOCK_SHARE);
105
106 page = BufferGetPage(buf);
107
108 /*
109 * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
110 * treat them as being free space for our purposes.
111 */
112 if (!PageIsNew(page))
113 stat->free_space += PageGetHeapFreeSpace(page);
114 else
115 stat->free_space += BLCKSZ - SizeOfPageHeaderData;
116
117 if (PageIsNew(page) || PageIsEmpty(page))
118 {
119 UnlockReleaseBuffer(buf);
120 continue;
121 }
122
123 scanned++;
124
125 /*
126 * Look at each tuple on the page and decide whether it's live or
127 * dead, then count it and its size. Unlike lazy_scan_heap, we can
128 * afford to ignore problems and special cases.
129 */
130 maxoff = PageGetMaxOffsetNumber(page);
131
132 for (offnum = FirstOffsetNumber;
133 offnum <= maxoff;
134 offnum = OffsetNumberNext(offnum))
135 {
136 ItemId itemid;
137 HeapTupleData tuple;
138
139 itemid = PageGetItemId(page, offnum);
140
141 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
142 ItemIdIsDead(itemid))
143 {
144 continue;
145 }
146
147 Assert(ItemIdIsNormal(itemid));
148
149 ItemPointerSet(&(tuple.t_self), blkno, offnum);
150
151 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
152 tuple.t_len = ItemIdGetLength(itemid);
153 tuple.t_tableOid = RelationGetRelid(rel);
154
155 /*
156 * We count live and dead tuples, but we also need to add up
157 * others in order to feed vac_estimate_reltuples.
158 */
159 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
160 {
161 case HEAPTUPLE_RECENTLY_DEAD:
162 misc_count++;
163 /* Fall through */
164 case HEAPTUPLE_DEAD:
165 stat->dead_tuple_len += tuple.t_len;
166 stat->dead_tuple_count++;
167 break;
168 case HEAPTUPLE_LIVE:
169 stat->tuple_len += tuple.t_len;
170 stat->tuple_count++;
171 break;
172 case HEAPTUPLE_INSERT_IN_PROGRESS:
173 case HEAPTUPLE_DELETE_IN_PROGRESS:
174 misc_count++;
175 break;
176 default:
177 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
178 break;
179 }
180 }
181
182 UnlockReleaseBuffer(buf);
183 }
184
185 stat->table_len = (uint64) nblocks * BLCKSZ;
186
187 stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
188 stat->tuple_count + misc_count);
189
190 /*
191 * Calculate percentages if the relation has one or more pages.
192 */
193 if (nblocks != 0)
194 {
195 stat->scanned_percent = 100 * scanned / nblocks;
196 stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
197 stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
198 stat->free_percent = 100.0 * stat->free_space / stat->table_len;
199 }
200
201 if (BufferIsValid(vmbuffer))
202 {
203 ReleaseBuffer(vmbuffer);
204 vmbuffer = InvalidBuffer;
205 }
206 }
207
208 /*
209 * Returns estimated live/dead tuple statistics for the given relid.
210 *
211 * The superuser() check here must be kept as the library might be upgraded
212 * without the extension being upgraded, meaning that in pre-1.5 installations
213 * these functions could be called by any user.
214 */
215 Datum
pgstattuple_approx(PG_FUNCTION_ARGS)216 pgstattuple_approx(PG_FUNCTION_ARGS)
217 {
218 Oid relid = PG_GETARG_OID(0);
219
220 if (!superuser())
221 ereport(ERROR,
222 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
223 (errmsg("must be superuser to use pgstattuple functions"))));
224
225 PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
226 }
227
228 /*
229 * As of pgstattuple version 1.5, we no longer need to check if the user
230 * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
231 * Users can then grant access to it based on their policies.
232 *
233 * Otherwise identical to pgstattuple_approx (above).
234 */
235 Datum
pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)236 pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
237 {
238 Oid relid = PG_GETARG_OID(0);
239
240 PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
241 }
242
243 Datum
pgstattuple_approx_internal(Oid relid,FunctionCallInfo fcinfo)244 pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
245 {
246 Relation rel;
247 output_type stat = {0};
248 TupleDesc tupdesc;
249 bool nulls[NUM_OUTPUT_COLUMNS];
250 Datum values[NUM_OUTPUT_COLUMNS];
251 HeapTuple ret;
252 int i = 0;
253
254 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
255 elog(ERROR, "return type must be a row type");
256
257 if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
258 elog(ERROR, "incorrect number of output arguments");
259
260 rel = relation_open(relid, AccessShareLock);
261
262 /*
263 * Reject attempts to read non-local temporary relations; we would be
264 * likely to get wrong data since we have no visibility into the owning
265 * session's local buffers.
266 */
267 if (RELATION_IS_OTHER_TEMP(rel))
268 ereport(ERROR,
269 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
270 errmsg("cannot access temporary tables of other sessions")));
271
272 /*
273 * We support only ordinary relations and materialised views, because we
274 * depend on the visibility map and free space map for our estimates about
275 * unscanned pages.
276 */
277 if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
278 rel->rd_rel->relkind == RELKIND_MATVIEW))
279 ereport(ERROR,
280 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
281 errmsg("\"%s\" is not a table or materialized view",
282 RelationGetRelationName(rel))));
283
284 statapprox_heap(rel, &stat);
285
286 relation_close(rel, AccessShareLock);
287
288 memset(nulls, 0, sizeof(nulls));
289
290 values[i++] = Int64GetDatum(stat.table_len);
291 values[i++] = Float8GetDatum(stat.scanned_percent);
292 values[i++] = Int64GetDatum(stat.tuple_count);
293 values[i++] = Int64GetDatum(stat.tuple_len);
294 values[i++] = Float8GetDatum(stat.tuple_percent);
295 values[i++] = Int64GetDatum(stat.dead_tuple_count);
296 values[i++] = Int64GetDatum(stat.dead_tuple_len);
297 values[i++] = Float8GetDatum(stat.dead_tuple_percent);
298 values[i++] = Int64GetDatum(stat.free_space);
299 values[i++] = Float8GetDatum(stat.free_percent);
300
301 ret = heap_form_tuple(tupdesc, values, nulls);
302 return HeapTupleGetDatum(ret);
303 }
304