1 /*****************************************************************************
2
3 Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2020, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file ut/ut0stage.h
22 Supplementary code to performance schema stage instrumentation.
23
24 Created Nov 12, 2014 Vasil Dimov
25 *******************************************************/
26
27 #ifndef ut0stage_h
28 #define ut0stage_h
29
30 #include <algorithm>
31 #include <math.h>
32
33 #include "my_global.h" /* needed for headers from mysql/psi/ */
34
35 #include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */
36 #include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */
37
38 #include "dict0mem.h" /* dict_index_t */
39 #include "row0log.h" /* row_log_estimate_work() */
40 #include "srv0srv.h" /* ut_stage_alter_t */
41
42 #ifdef HAVE_PSI_STAGE_INTERFACE
43
44 /** Class used to report ALTER TABLE progress via performance_schema.
45 The only user of this class is the ALTER TABLE code and it calls the methods
46 in the following order
47 constructor
48 begin_phase_read_pk()
49 multiple times:
50 n_pk_recs_inc() // once per record read
51 inc() // once per page read
52 end_phase_read_pk()
53 if any new indexes are being added, for each one:
54 begin_phase_sort()
55 multiple times:
56 inc() // once per record sorted
57 begin_phase_insert()
58 multiple times:
59 inc() // once per record inserted
60 being_phase_log_index()
61 multiple times:
62 inc() // once per log-block applied
63 begin_phase_log_table()
64 multiple times:
65 inc() // once per log-block applied
66 begin_phase_end()
67 destructor
68
69 This class knows the specifics of each phase and tries to increment the
70 progress in an even manner across the entire ALTER TABLE lifetime. */
71 class ut_stage_alter_t {
72 public:
73 /** Constructor.
74 @param[in] pk primary key of the old table */
75 explicit
ut_stage_alter_t(const dict_index_t * pk)76 ut_stage_alter_t(
77 const dict_index_t* pk)
78 :
79 m_progress(NULL),
80 m_pk(pk),
81 m_n_pk_recs(0),
82 m_n_pk_pages(0),
83 m_n_recs_processed(0),
84 m_cur_phase(NOT_STARTED)
85 {
86 }
87
88 /** Destructor. */
89 ~ut_stage_alter_t();
90
91 /** Flag an ALTER TABLE start (read primary key phase).
92 @param[in] n_sort_indexes number of indexes that will be sorted
93 during ALTER TABLE, used for estimating the total work to be done */
94 void
95 begin_phase_read_pk(
96 ulint n_sort_indexes);
97
98 /** Increment the number of records in PK (table) with 1.
99 This is used to get more accurate estimate about the number of
100 records per page which is needed because some phases work on
101 per-page basis while some work on per-record basis and we want
102 to get the progress as even as possible. */
103 void
104 n_pk_recs_inc();
105
106 /** Flag either one record or one page processed, depending on the
107 current phase.
108 @param[in] inc_val flag this many units processed at once */
109 void
110 inc(
111 ulint inc_val = 1);
112
113 /** Flag the end of reading of the primary key.
114 Here we know the exact number of pages and records and calculate
115 the number of records per page and refresh the estimate. */
116 void
117 end_phase_read_pk();
118
119 /** Flag the beginning of the sort phase.
120 @param[in] sort_multi_factor since merge sort processes
121 one page more than once we only update the estimate once per this
122 many pages processed. */
123 void
124 begin_phase_sort(
125 double sort_multi_factor);
126
127 /** Flag the beginning of the insert phase. */
128 void
129 begin_phase_insert();
130
131 /** Flag the beginning of the log index phase. */
132 void
133 begin_phase_log_index();
134
135 /** Flag the beginning of the log table phase. */
136 void
137 begin_phase_log_table();
138
139 /** Flag the beginning of the end phase. */
140 void
141 begin_phase_end();
142
143 private:
144
145 /** Update the estimate of total work to be done. */
146 void
147 reestimate();
148
149 /** Change the current phase.
150 @param[in] new_stage pointer to the new stage to change to */
151 void
152 change_phase(
153 const PSI_stage_info* new_stage);
154
155 /** Performance schema accounting object. */
156 PSI_stage_progress* m_progress;
157
158 /** Old table PK. Used for calculating the estimate. */
159 const dict_index_t* m_pk;
160
161 /** Number of records in the primary key (table), including delete
162 marked records. */
163 ulint m_n_pk_recs;
164
165 /** Number of leaf pages in the primary key. */
166 ulint m_n_pk_pages;
167
168 /** Estimated number of records per page in the primary key. */
169 double m_n_recs_per_page;
170
171 /** Number of indexes that are being added. */
172 ulint m_n_sort_indexes;
173
174 /** During the sort phase, increment the counter once per this
175 many pages processed. This is because sort processes one page more
176 than once. */
177 ulint m_sort_multi_factor;
178
179 /** Number of records processed during sort & insert phases. We
180 need to increment the counter only once page, or once per
181 recs-per-page records. */
182 ulint m_n_recs_processed;
183
184 /** Current phase. */
185 enum {
186 NOT_STARTED = 0,
187 READ_PK = 1,
188 SORT = 2,
189 INSERT = 3,
190 /* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h
191 LOG_INDEX = 5,
192 LOG_TABLE = 6, */
193 LOG_INNODB_INDEX = 5,
194 LOG_INNODB_TABLE = 6,
195 END = 7,
196 } m_cur_phase;
197 };
198
199 /** Destructor. */
200 inline
~ut_stage_alter_t()201 ut_stage_alter_t::~ut_stage_alter_t()
202 {
203 if (m_progress == NULL) {
204 return;
205 }
206
207 /* Set completed = estimated before we quit. */
208 mysql_stage_set_work_completed(
209 m_progress,
210 mysql_stage_get_work_estimated(m_progress));
211
212 mysql_end_stage();
213 }
214
215 /** Flag an ALTER TABLE start (read primary key phase).
216 @param[in] n_sort_indexes number of indexes that will be sorted
217 during ALTER TABLE, used for estimating the total work to be done */
218 inline
219 void
begin_phase_read_pk(ulint n_sort_indexes)220 ut_stage_alter_t::begin_phase_read_pk(
221 ulint n_sort_indexes)
222 {
223 m_n_sort_indexes = n_sort_indexes;
224
225 m_cur_phase = READ_PK;
226
227 m_progress = mysql_set_stage(
228 srv_stage_alter_table_read_pk_internal_sort.m_key);
229
230 mysql_stage_set_work_completed(m_progress, 0);
231 reestimate();
232 }
233
234 /** Increment the number of records in PK (table) with 1.
235 This is used to get more accurate estimate about the number of
236 records per page which is needed because some phases work on
237 per-page basis while some work on per-record basis and we want
238 to get the progress as even as possible. */
239 inline
240 void
n_pk_recs_inc()241 ut_stage_alter_t::n_pk_recs_inc()
242 {
243 m_n_pk_recs++;
244 }
245
246 /** Flag either one record or one page processed, depending on the
247 current phase. */
248 inline
249 void
inc(ulint inc_val)250 ut_stage_alter_t::inc(ulint inc_val)
251 {
252 if (m_progress == NULL) {
253 return;
254 }
255
256 ulint multi_factor = 1;
257 bool should_proceed = true;
258
259 switch (m_cur_phase) {
260 case NOT_STARTED:
261 ut_error;
262 case READ_PK:
263 m_n_pk_pages++;
264 ut_ad(inc_val == 1);
265 /* Overall the read pk phase will read all the pages from the
266 PK and will do work, proportional to the number of added
267 indexes, thus when this is called once per read page we
268 increment with 1 + m_n_sort_indexes */
269 inc_val = 1 + m_n_sort_indexes;
270 break;
271 case SORT:
272 multi_factor = m_sort_multi_factor;
273 /* fall through */
274 case INSERT: {
275 /* Increment the progress every nth record. During
276 sort and insert phases, this method is called once per
277 record processed. We need fractional point numbers here
278 because "records per page" is such a number naturally and
279 to avoid rounding skew we want, for example: if there are
280 (double) N records per page, then the work_completed
281 should be incremented on the inc() calls round(k*N),
282 for k=1,2,3... */
283 const double every_nth = m_n_recs_per_page *
284 static_cast<double>(multi_factor);
285
286 const ulint k = static_cast<ulint>(
287 round(static_cast<double>(m_n_recs_processed) /
288 every_nth));
289
290 const ulint nth = static_cast<ulint>(
291 round(static_cast<double>(k) * every_nth));
292
293 should_proceed = m_n_recs_processed == nth;
294
295 m_n_recs_processed++;
296
297 break;
298 }
299 /* JAN: TODO: MySQL 5.7
300 case LOG_INDEX:
301 break;
302 case LOG_TABLE:
303 break; */
304 case LOG_INNODB_INDEX:
305 case LOG_INNODB_TABLE:
306 break;
307 case END:
308 break;
309 }
310
311 if (should_proceed) {
312 mysql_stage_inc_work_completed(m_progress, inc_val);
313 reestimate();
314 }
315 }
316
317 /** Flag the end of reading of the primary key.
318 Here we know the exact number of pages and records and calculate
319 the number of records per page and refresh the estimate. */
320 inline
321 void
end_phase_read_pk()322 ut_stage_alter_t::end_phase_read_pk()
323 {
324 reestimate();
325
326 if (m_n_pk_pages == 0) {
327 /* The number of pages in the PK could be 0 if the tree is
328 empty. In this case we set m_n_recs_per_page to 1 to avoid
329 division by zero later. */
330 m_n_recs_per_page = 1.0;
331 } else {
332 m_n_recs_per_page = std::max(
333 static_cast<double>(m_n_pk_recs)
334 / static_cast<double>(m_n_pk_pages),
335 1.0);
336 }
337 }
338
339 /** Flag the beginning of the sort phase.
340 @param[in] sort_multi_factor since merge sort processes
341 one page more than once we only update the estimate once per this
342 many pages processed. */
343 inline
344 void
begin_phase_sort(double sort_multi_factor)345 ut_stage_alter_t::begin_phase_sort(
346 double sort_multi_factor)
347 {
348 if (sort_multi_factor <= 1.0) {
349 m_sort_multi_factor = 1;
350 } else {
351 m_sort_multi_factor = static_cast<ulint>(
352 round(sort_multi_factor));
353 }
354
355 change_phase(&srv_stage_alter_table_merge_sort);
356 }
357
358 /** Flag the beginning of the insert phase. */
359 inline
360 void
begin_phase_insert()361 ut_stage_alter_t::begin_phase_insert()
362 {
363 change_phase(&srv_stage_alter_table_insert);
364 }
365
366 /** Flag the beginning of the log index phase. */
367 inline
368 void
begin_phase_log_index()369 ut_stage_alter_t::begin_phase_log_index()
370 {
371 change_phase(&srv_stage_alter_table_log_index);
372 }
373
374 /** Flag the beginning of the log table phase. */
375 inline
376 void
begin_phase_log_table()377 ut_stage_alter_t::begin_phase_log_table()
378 {
379 change_phase(&srv_stage_alter_table_log_table);
380 }
381
382 /** Flag the beginning of the end phase. */
383 inline
384 void
begin_phase_end()385 ut_stage_alter_t::begin_phase_end()
386 {
387 change_phase(&srv_stage_alter_table_end);
388 }
389
390 /** Update the estimate of total work to be done. */
391 inline
392 void
reestimate()393 ut_stage_alter_t::reestimate()
394 {
395 if (m_progress == NULL) {
396 return;
397 }
398
399 /* During the log table phase we calculate the estimate as
400 work done so far + log size remaining. */
401 if (m_cur_phase == LOG_INNODB_TABLE) {
402 mysql_stage_set_work_estimated(
403 m_progress,
404 mysql_stage_get_work_completed(m_progress)
405 + row_log_estimate_work(m_pk));
406 return;
407 }
408
409 /* During the other phases we use a formula, regardless of
410 how much work has been done so far. */
411
412 /* For number of pages in the PK - if the PK has not been
413 read yet, use stat_n_leaf_pages (approximate), otherwise
414 use the exact number we gathered. */
415 const ulint n_pk_pages
416 = m_cur_phase != READ_PK
417 ? m_n_pk_pages
418 : m_pk->stat_n_leaf_pages;
419
420 ulonglong estimate __attribute__((unused))
421 = n_pk_pages
422 * (1 /* read PK */
423 + m_n_sort_indexes /* row_merge_buf_sort() inside the
424 read PK per created index */
425 + m_n_sort_indexes * 2 /* sort & insert per created index */)
426 + row_log_estimate_work(m_pk);
427
428 /* Prevent estimate < completed */
429 estimate = std::max(estimate,
430 mysql_stage_get_work_completed(m_progress));
431
432 mysql_stage_set_work_estimated(m_progress, estimate);
433 }
434
435 /** Change the current phase.
436 @param[in] new_stage pointer to the new stage to change to */
437 inline
438 void
change_phase(const PSI_stage_info * new_stage)439 ut_stage_alter_t::change_phase(
440 const PSI_stage_info* new_stage)
441 {
442 if (m_progress == NULL) {
443 return;
444 }
445
446 if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) {
447 m_cur_phase = READ_PK;
448 } else if (new_stage == &srv_stage_alter_table_merge_sort) {
449 m_cur_phase = SORT;
450 } else if (new_stage == &srv_stage_alter_table_insert) {
451 m_cur_phase = INSERT;
452 /* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */
453 } else if (new_stage == &srv_stage_alter_table_log_index) {
454 m_cur_phase = LOG_INNODB_INDEX;
455 } else if (new_stage == &srv_stage_alter_table_log_table) {
456 m_cur_phase = LOG_INNODB_TABLE;
457 } else if (new_stage == &srv_stage_alter_table_end) {
458 m_cur_phase = END;
459 } else {
460 ut_error;
461 }
462
463 const ulonglong c = mysql_stage_get_work_completed(m_progress);
464 const ulonglong e = mysql_stage_get_work_estimated(m_progress);
465
466 m_progress = mysql_set_stage(new_stage->m_key);
467
468 mysql_stage_set_work_completed(m_progress, c);
469 mysql_stage_set_work_estimated(m_progress, e);
470 }
471 #else /* HAVE_PSI_STAGE_INTERFACE */
472
473 class ut_stage_alter_t {
474 public:
ut_stage_alter_t(const dict_index_t *)475 explicit ut_stage_alter_t(const dict_index_t*) {}
476
begin_phase_read_pk(ulint)477 void begin_phase_read_pk(ulint) {}
478
n_pk_recs_inc()479 void n_pk_recs_inc() {}
480
inc()481 void inc() {}
inc(ulint)482 void inc(ulint) {}
483
end_phase_read_pk()484 void end_phase_read_pk() {}
485
begin_phase_sort(double)486 void begin_phase_sort(double) {}
487
begin_phase_insert()488 void begin_phase_insert() {}
489
begin_phase_log_index()490 void begin_phase_log_index() {}
491
begin_phase_log_table()492 void begin_phase_log_table() {}
493
begin_phase_end()494 void begin_phase_end() {}
495 };
496
497 #endif /* HAVE_PSI_STAGE_INTERFACE */
498
499 #endif /* ut0stage_h */
500