1 /*-------------------------------------------------------------------------
2 *
3 * pgstatapprox.c
4 * Bloat estimation functions
5 *
6 * Copyright (c) 2014-2016, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/pgstattuple/pgstatapprox.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "access/visibilitymap.h"
16 #include "access/transam.h"
17 #include "access/xact.h"
18 #include "access/multixact.h"
19 #include "access/htup_details.h"
20 #include "catalog/namespace.h"
21 #include "funcapi.h"
22 #include "miscadmin.h"
23 #include "storage/bufmgr.h"
24 #include "storage/freespace.h"
25 #include "storage/procarray.h"
26 #include "storage/lmgr.h"
27 #include "utils/builtins.h"
28 #include "utils/tqual.h"
29 #include "commands/vacuum.h"
30
31 PG_FUNCTION_INFO_V1(pgstattuple_approx);
32
33 typedef struct output_type
34 {
35 uint64 table_len;
36 uint64 scanned_percent;
37 uint64 tuple_count;
38 uint64 tuple_len;
39 double tuple_percent;
40 uint64 dead_tuple_count;
41 uint64 dead_tuple_len;
42 double dead_tuple_percent;
43 uint64 free_space;
44 double free_percent;
45 } output_type;
46
47 #define NUM_OUTPUT_COLUMNS 10
48
49 /*
50 * This function takes an already open relation and scans its pages,
51 * skipping those that have the corresponding visibility map bit set.
52 * For pages we skip, we find the free space from the free space map
53 * and approximate tuple_len on that basis. For the others, we count
54 * the exact number of dead tuples etc.
55 *
56 * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
57 * we do not try to avoid skipping single pages.
58 */
59 static void
statapprox_heap(Relation rel,output_type * stat)60 statapprox_heap(Relation rel, output_type *stat)
61 {
62 BlockNumber scanned,
63 nblocks,
64 blkno;
65 Buffer vmbuffer = InvalidBuffer;
66 BufferAccessStrategy bstrategy;
67 TransactionId OldestXmin;
68 uint64 misc_count = 0;
69
70 OldestXmin = GetOldestXmin(rel, true);
71 bstrategy = GetAccessStrategy(BAS_BULKREAD);
72
73 nblocks = RelationGetNumberOfBlocks(rel);
74 scanned = 0;
75
76 for (blkno = 0; blkno < nblocks; blkno++)
77 {
78 Buffer buf;
79 Page page;
80 OffsetNumber offnum,
81 maxoff;
82 Size freespace;
83
84 CHECK_FOR_INTERRUPTS();
85
86 /*
87 * If the page has only visible tuples, then we can find out the free
88 * space from the FSM and move on.
89 */
90 if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
91 {
92 freespace = GetRecordedFreeSpace(rel, blkno);
93 stat->tuple_len += BLCKSZ - freespace;
94 stat->free_space += freespace;
95 continue;
96 }
97
98 buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
99 RBM_NORMAL, bstrategy);
100
101 LockBuffer(buf, BUFFER_LOCK_SHARE);
102
103 page = BufferGetPage(buf);
104
105 /*
106 * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
107 * treat them as being free space for our purposes.
108 */
109 if (!PageIsNew(page))
110 stat->free_space += PageGetHeapFreeSpace(page);
111 else
112 stat->free_space += BLCKSZ - SizeOfPageHeaderData;
113
114 if (PageIsNew(page) || PageIsEmpty(page))
115 {
116 UnlockReleaseBuffer(buf);
117 continue;
118 }
119
120 scanned++;
121
122 /*
123 * Look at each tuple on the page and decide whether it's live or
124 * dead, then count it and its size. Unlike lazy_scan_heap, we can
125 * afford to ignore problems and special cases.
126 */
127 maxoff = PageGetMaxOffsetNumber(page);
128
129 for (offnum = FirstOffsetNumber;
130 offnum <= maxoff;
131 offnum = OffsetNumberNext(offnum))
132 {
133 ItemId itemid;
134 HeapTupleData tuple;
135
136 itemid = PageGetItemId(page, offnum);
137
138 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
139 ItemIdIsDead(itemid))
140 {
141 continue;
142 }
143
144 Assert(ItemIdIsNormal(itemid));
145
146 ItemPointerSet(&(tuple.t_self), blkno, offnum);
147
148 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
149 tuple.t_len = ItemIdGetLength(itemid);
150 tuple.t_tableOid = RelationGetRelid(rel);
151
152 /*
153 * We count live and dead tuples, but we also need to add up
154 * others in order to feed vac_estimate_reltuples.
155 */
156 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
157 {
158 case HEAPTUPLE_RECENTLY_DEAD:
159 misc_count++;
160 /* Fall through */
161 case HEAPTUPLE_DEAD:
162 stat->dead_tuple_len += tuple.t_len;
163 stat->dead_tuple_count++;
164 break;
165 case HEAPTUPLE_LIVE:
166 stat->tuple_len += tuple.t_len;
167 stat->tuple_count++;
168 break;
169 case HEAPTUPLE_INSERT_IN_PROGRESS:
170 case HEAPTUPLE_DELETE_IN_PROGRESS:
171 misc_count++;
172 break;
173 default:
174 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
175 break;
176 }
177 }
178
179 UnlockReleaseBuffer(buf);
180 }
181
182 stat->table_len = (uint64) nblocks *BLCKSZ;
183
184 stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
185 stat->tuple_count + misc_count);
186
187 /*
188 * Calculate percentages if the relation has one or more pages.
189 */
190 if (nblocks != 0)
191 {
192 stat->scanned_percent = 100 * scanned / nblocks;
193 stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
194 stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
195 stat->free_percent = 100.0 * stat->free_space / stat->table_len;
196 }
197
198 if (BufferIsValid(vmbuffer))
199 {
200 ReleaseBuffer(vmbuffer);
201 vmbuffer = InvalidBuffer;
202 }
203 }
204
205 /*
206 * Returns estimated live/dead tuple statistics for the given relid.
207 */
208 Datum
pgstattuple_approx(PG_FUNCTION_ARGS)209 pgstattuple_approx(PG_FUNCTION_ARGS)
210 {
211 Oid relid = PG_GETARG_OID(0);
212 Relation rel;
213 output_type stat = {0};
214 TupleDesc tupdesc;
215 bool nulls[NUM_OUTPUT_COLUMNS];
216 Datum values[NUM_OUTPUT_COLUMNS];
217 HeapTuple ret;
218 int i = 0;
219
220 if (!superuser())
221 ereport(ERROR,
222 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
223 (errmsg("must be superuser to use pgstattuple functions"))));
224
225 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
226 elog(ERROR, "return type must be a row type");
227
228 if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
229 elog(ERROR, "incorrect number of output arguments");
230
231 rel = relation_open(relid, AccessShareLock);
232
233 /*
234 * Reject attempts to read non-local temporary relations; we would be
235 * likely to get wrong data since we have no visibility into the owning
236 * session's local buffers.
237 */
238 if (RELATION_IS_OTHER_TEMP(rel))
239 ereport(ERROR,
240 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241 errmsg("cannot access temporary tables of other sessions")));
242
243 /*
244 * We support only ordinary relations and materialised views, because we
245 * depend on the visibility map and free space map for our estimates about
246 * unscanned pages.
247 */
248 if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
249 rel->rd_rel->relkind == RELKIND_MATVIEW))
250 ereport(ERROR,
251 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
252 errmsg("\"%s\" is not a table or materialized view",
253 RelationGetRelationName(rel))));
254
255 statapprox_heap(rel, &stat);
256
257 relation_close(rel, AccessShareLock);
258
259 memset(nulls, 0, sizeof(nulls));
260
261 values[i++] = Int64GetDatum(stat.table_len);
262 values[i++] = Float8GetDatum(stat.scanned_percent);
263 values[i++] = Int64GetDatum(stat.tuple_count);
264 values[i++] = Int64GetDatum(stat.tuple_len);
265 values[i++] = Float8GetDatum(stat.tuple_percent);
266 values[i++] = Int64GetDatum(stat.dead_tuple_count);
267 values[i++] = Int64GetDatum(stat.dead_tuple_len);
268 values[i++] = Float8GetDatum(stat.dead_tuple_percent);
269 values[i++] = Int64GetDatum(stat.free_space);
270 values[i++] = Float8GetDatum(stat.free_percent);
271
272 ret = heap_form_tuple(tupdesc, values, nulls);
273 return HeapTupleGetDatum(ret);
274 }
275