1 /*-------------------------------------------------------------------------
2  *
3  * instrument.c
4  *	 functions for instrumentation of plan execution
5  *
6  *
7  * Copyright (c) 2001-2020, PostgreSQL Global Development Group
8  *
9  * IDENTIFICATION
10  *	  src/backend/executor/instrument.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <unistd.h>
17 
18 #include "executor/instrument.h"
19 
20 BufferUsage pgBufferUsage;
21 static BufferUsage save_pgBufferUsage;
22 WalUsage	pgWalUsage;
23 static WalUsage save_pgWalUsage;
24 
25 static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add);
26 static void WalUsageAdd(WalUsage *dst, WalUsage *add);
27 
28 
29 /* Allocate new instrumentation structure(s) */
30 Instrumentation *
InstrAlloc(int n,int instrument_options)31 InstrAlloc(int n, int instrument_options)
32 {
33 	Instrumentation *instr;
34 
35 	/* initialize all fields to zeroes, then modify as needed */
36 	instr = palloc0(n * sizeof(Instrumentation));
37 	if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL))
38 	{
39 		bool		need_buffers = (instrument_options & INSTRUMENT_BUFFERS) != 0;
40 		bool		need_wal = (instrument_options & INSTRUMENT_WAL) != 0;
41 		bool		need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
42 		int			i;
43 
44 		for (i = 0; i < n; i++)
45 		{
46 			instr[i].need_bufusage = need_buffers;
47 			instr[i].need_walusage = need_wal;
48 			instr[i].need_timer = need_timer;
49 		}
50 	}
51 
52 	return instr;
53 }
54 
55 /* Initialize a pre-allocated instrumentation structure. */
56 void
InstrInit(Instrumentation * instr,int instrument_options)57 InstrInit(Instrumentation *instr, int instrument_options)
58 {
59 	memset(instr, 0, sizeof(Instrumentation));
60 	instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0;
61 	instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0;
62 	instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0;
63 }
64 
65 /* Entry to a plan node */
66 void
InstrStartNode(Instrumentation * instr)67 InstrStartNode(Instrumentation *instr)
68 {
69 	if (instr->need_timer &&
70 		!INSTR_TIME_SET_CURRENT_LAZY(instr->starttime))
71 		elog(ERROR, "InstrStartNode called twice in a row");
72 
73 	/* save buffer usage totals at node entry, if needed */
74 	if (instr->need_bufusage)
75 		instr->bufusage_start = pgBufferUsage;
76 
77 	if (instr->need_walusage)
78 		instr->walusage_start = pgWalUsage;
79 }
80 
81 /* Exit from a plan node */
82 void
InstrStopNode(Instrumentation * instr,double nTuples)83 InstrStopNode(Instrumentation *instr, double nTuples)
84 {
85 	instr_time	endtime;
86 
87 	/* count the returned tuples */
88 	instr->tuplecount += nTuples;
89 
90 	/* let's update the time only if the timer was requested */
91 	if (instr->need_timer)
92 	{
93 		if (INSTR_TIME_IS_ZERO(instr->starttime))
94 			elog(ERROR, "InstrStopNode called without start");
95 
96 		INSTR_TIME_SET_CURRENT(endtime);
97 		INSTR_TIME_ACCUM_DIFF(instr->counter, endtime, instr->starttime);
98 
99 		INSTR_TIME_SET_ZERO(instr->starttime);
100 	}
101 
102 	/* Add delta of buffer usage since entry to node's totals */
103 	if (instr->need_bufusage)
104 		BufferUsageAccumDiff(&instr->bufusage,
105 							 &pgBufferUsage, &instr->bufusage_start);
106 
107 	if (instr->need_walusage)
108 		WalUsageAccumDiff(&instr->walusage,
109 						  &pgWalUsage, &instr->walusage_start);
110 
111 	/* Is this the first tuple of this cycle? */
112 	if (!instr->running)
113 	{
114 		instr->running = true;
115 		instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter);
116 	}
117 }
118 
119 /* Finish a run cycle for a plan node */
120 void
InstrEndLoop(Instrumentation * instr)121 InstrEndLoop(Instrumentation *instr)
122 {
123 	double		totaltime;
124 
125 	/* Skip if nothing has happened, or already shut down */
126 	if (!instr->running)
127 		return;
128 
129 	if (!INSTR_TIME_IS_ZERO(instr->starttime))
130 		elog(ERROR, "InstrEndLoop called on running node");
131 
132 	/* Accumulate per-cycle statistics into totals */
133 	totaltime = INSTR_TIME_GET_DOUBLE(instr->counter);
134 
135 	instr->startup += instr->firsttuple;
136 	instr->total += totaltime;
137 	instr->ntuples += instr->tuplecount;
138 	instr->nloops += 1;
139 
140 	/* Reset for next cycle (if any) */
141 	instr->running = false;
142 	INSTR_TIME_SET_ZERO(instr->starttime);
143 	INSTR_TIME_SET_ZERO(instr->counter);
144 	instr->firsttuple = 0;
145 	instr->tuplecount = 0;
146 }
147 
148 /* aggregate instrumentation information */
149 void
InstrAggNode(Instrumentation * dst,Instrumentation * add)150 InstrAggNode(Instrumentation *dst, Instrumentation *add)
151 {
152 	if (!dst->running && add->running)
153 	{
154 		dst->running = true;
155 		dst->firsttuple = add->firsttuple;
156 	}
157 	else if (dst->running && add->running && dst->firsttuple > add->firsttuple)
158 		dst->firsttuple = add->firsttuple;
159 
160 	INSTR_TIME_ADD(dst->counter, add->counter);
161 
162 	dst->tuplecount += add->tuplecount;
163 	dst->startup += add->startup;
164 	dst->total += add->total;
165 	dst->ntuples += add->ntuples;
166 	dst->ntuples2 += add->ntuples2;
167 	dst->nloops += add->nloops;
168 	dst->nfiltered1 += add->nfiltered1;
169 	dst->nfiltered2 += add->nfiltered2;
170 
171 	/* Add delta of buffer usage since entry to node's totals */
172 	if (dst->need_bufusage)
173 		BufferUsageAdd(&dst->bufusage, &add->bufusage);
174 
175 	if (dst->need_walusage)
176 		WalUsageAdd(&dst->walusage, &add->walusage);
177 }
178 
179 /* note current values during parallel executor startup */
180 void
InstrStartParallelQuery(void)181 InstrStartParallelQuery(void)
182 {
183 	save_pgBufferUsage = pgBufferUsage;
184 	save_pgWalUsage = pgWalUsage;
185 }
186 
187 /* report usage after parallel executor shutdown */
188 void
InstrEndParallelQuery(BufferUsage * bufusage,WalUsage * walusage)189 InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
190 {
191 	memset(bufusage, 0, sizeof(BufferUsage));
192 	BufferUsageAccumDiff(bufusage, &pgBufferUsage, &save_pgBufferUsage);
193 	memset(walusage, 0, sizeof(WalUsage));
194 	WalUsageAccumDiff(walusage, &pgWalUsage, &save_pgWalUsage);
195 }
196 
197 /* accumulate work done by workers in leader's stats */
198 void
InstrAccumParallelQuery(BufferUsage * bufusage,WalUsage * walusage)199 InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
200 {
201 	BufferUsageAdd(&pgBufferUsage, bufusage);
202 	WalUsageAdd(&pgWalUsage, walusage);
203 }
204 
205 /* dst += add */
206 static void
BufferUsageAdd(BufferUsage * dst,const BufferUsage * add)207 BufferUsageAdd(BufferUsage *dst, const BufferUsage *add)
208 {
209 	dst->shared_blks_hit += add->shared_blks_hit;
210 	dst->shared_blks_read += add->shared_blks_read;
211 	dst->shared_blks_dirtied += add->shared_blks_dirtied;
212 	dst->shared_blks_written += add->shared_blks_written;
213 	dst->local_blks_hit += add->local_blks_hit;
214 	dst->local_blks_read += add->local_blks_read;
215 	dst->local_blks_dirtied += add->local_blks_dirtied;
216 	dst->local_blks_written += add->local_blks_written;
217 	dst->temp_blks_read += add->temp_blks_read;
218 	dst->temp_blks_written += add->temp_blks_written;
219 	INSTR_TIME_ADD(dst->blk_read_time, add->blk_read_time);
220 	INSTR_TIME_ADD(dst->blk_write_time, add->blk_write_time);
221 }
222 
223 /* dst += add - sub */
224 void
BufferUsageAccumDiff(BufferUsage * dst,const BufferUsage * add,const BufferUsage * sub)225 BufferUsageAccumDiff(BufferUsage *dst,
226 					 const BufferUsage *add,
227 					 const BufferUsage *sub)
228 {
229 	dst->shared_blks_hit += add->shared_blks_hit - sub->shared_blks_hit;
230 	dst->shared_blks_read += add->shared_blks_read - sub->shared_blks_read;
231 	dst->shared_blks_dirtied += add->shared_blks_dirtied - sub->shared_blks_dirtied;
232 	dst->shared_blks_written += add->shared_blks_written - sub->shared_blks_written;
233 	dst->local_blks_hit += add->local_blks_hit - sub->local_blks_hit;
234 	dst->local_blks_read += add->local_blks_read - sub->local_blks_read;
235 	dst->local_blks_dirtied += add->local_blks_dirtied - sub->local_blks_dirtied;
236 	dst->local_blks_written += add->local_blks_written - sub->local_blks_written;
237 	dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read;
238 	dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written;
239 	INSTR_TIME_ACCUM_DIFF(dst->blk_read_time,
240 						  add->blk_read_time, sub->blk_read_time);
241 	INSTR_TIME_ACCUM_DIFF(dst->blk_write_time,
242 						  add->blk_write_time, sub->blk_write_time);
243 }
244 
245 /* helper functions for WAL usage accumulation */
246 static void
WalUsageAdd(WalUsage * dst,WalUsage * add)247 WalUsageAdd(WalUsage *dst, WalUsage *add)
248 {
249 	dst->wal_bytes += add->wal_bytes;
250 	dst->wal_records += add->wal_records;
251 	dst->wal_fpi += add->wal_fpi;
252 }
253 
254 void
WalUsageAccumDiff(WalUsage * dst,const WalUsage * add,const WalUsage * sub)255 WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
256 {
257 	dst->wal_bytes += add->wal_bytes - sub->wal_bytes;
258 	dst->wal_records += add->wal_records - sub->wal_records;
259 	dst->wal_fpi += add->wal_fpi - sub->wal_fpi;
260 }
261