1 /*****************************************************************************
2
3 Copyright (c) 2016, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26
27 /** @file handler/p_s.cc
28 InnoDB performance_schema tables interface to MySQL.
29
30 *******************************************************/
31
32 #include "storage/innobase/handler/p_s.h"
33
34 #include <stdlib.h>
35 #include <sys/types.h>
36
37 #include "lock0iter.h"
38 #include "lock0lock.h"
39 #include "sql_table.h"
40 #include "table.h"
41 #include "trx0i_s.h"
42 #include "trx0sys.h"
43
44 #include "my_io.h"
45
46 /**
47 @page PAGE_INNODB_PFS Innodb data lock instrumentation
48 Innodb Performance Schema data lock instrumentation
49
50 @section data_lock_iterators Data lock iterators
51
52 To provide content to the performance_schema.data_locks table,
53 innodb implements #Innodb_data_lock_iterator.
54
55 Likewise, table performance_schema.data_wait_locks
56 is populated with #Innodb_data_lock_wait_iterator.
57
58 Both these iterators need to return the data present
59 in the innodb engine memory,
60 which imply to take the proper mutex locks when inspecting it.
61 The structure to inspect here is the transaction list (#trx_sys)
62
63 How to implement this scan is critical for performances.
64
65 @subsection no_full_scan No full scan
66
67 Consider this implementation:
68 - Take all necessary locks
69 - Scan all the innodb internal locks
70 - Report all of them to the performance schema
71 - Release all the locks taken
72
73 This implementation materializes the entire table.
74
75 The benefits with this approach are:
76 - The materialized table is consistent
77
78 The problems with this approach are:
79 - The innodb engine is frozen for the entire duration,
80 for a time that is unpredictable.
81 - Memory consumption spikes, without bounds
82 - Materializing all rows upfront is incompatible with supporting an index
83
84 For example with N = 10,000 transactions,
85 a single scan reports all 10,000 transaction locks.
86
87 This alternative is rejected.
88
89 @subsection no_single_row_scan No single row scan
90
91 Consider this implementation:
92 - Take all necessary locks
93 - Resume the scan on innodb internal locks for 1 record
94 - Report this record to the performance schema
95 - Release all the locks taken
96
97 This implementation returns a row for a single transaction,
98 or even a single lock, at a time.
99
100 The benefits with this approach are:
101 - Memory consumption is well bounded, and low.
102
103 The problems with this approach are:
104 - Data reported can be very inconsistent.
105 - Implementing a restartable scan, on a very dynamic structure,
106 without holding any lock, is complex.
107 - Even assuming how to implement a scan is resolved,
108 looping N times to find element i, i+1, i+2 ... in a list
109 ends up having a complexity in O(N^2), consuming CPU.
110
111 For example with N = 10,000 transactions,
112 the trx_list would be scanned 10,000 times
113 to return 1 record each time.
114 The total number of operations on the list is 100 Millions.
115
116 This alternative is rejected.
117
118 @subsection restartable_batch_scan Restartable batch scan
119
120 What is implemented is:
121 - Take all necessary locks
122 - Resume the scan on innodb internal locks,
123 for a given record range
124 - Report all the records in the range to the performance schema
125 - Release all the locks taken
126
127 This is a compromise, with the following properties:
128 - Memory consumption is bounded,
129 by the number of records returned in each range.
130 - The duration of mutex locks on innodb structures is bounded
131 by the number of records in each range
132 - The data returned is not consistent,
133 but at least it is "consistent by chunks"
134 - The overall scan complexity is (N/RANGE)^2, where RANGE is the range size.
135 This is still technically O(N^2), but in practice should be reasonable.
136
137 For example with N = 10,000 transactions and RANGE = 256,
138 there are 40 batches at the trx list,
139 where each batch reports (up to) 256 trx, with the trx locks.
140 The total number of operations on the list is 400 thousands.
141 */
142
143 static const char *g_engine = "INNODB";
144 static const size_t g_engine_length = 6;
145
get_next_trx(const trx_t * trx,bool read_write)146 inline trx_t *get_next_trx(const trx_t *trx, bool read_write) {
147 if (read_write) {
148 return (UT_LIST_GET_NEXT(trx_list, trx));
149 } else {
150 return (UT_LIST_GET_NEXT(mysql_trx_list, trx));
151 }
152 }
153
154 /** Pass of a given scan. */
155 enum scan_pass {
156 INIT_SCANNING,
157 /** Scan the RW trx list.
158 @sa trx_sys_t::rw_trx_list
159 */
160 SCANNING_RW_TRX_LIST,
161 /** Scan the MySQL trx list.
162 @sa trx_t::mysql_trx_list
163 */
164 SCANNING_MYSQL_TRX_LIST,
165 DONE_SCANNING
166 };
167
168 /** State of a given scan.
169 Scans are restartable, and done in multiple calls.
170 Overall, the code scans separately:
171 - the RW trx list
172 - the MySQL trx list
173 For each list, the scan is done by ranges of trx_id values.
174 Saving the current scan state allows to resume where the previous
175 scan ended.
176 */
177 class Innodb_trx_scan_state {
178 public:
179 const trx_id_t SCAN_RANGE = 256;
180
Innodb_trx_scan_state()181 Innodb_trx_scan_state()
182 : m_scan_pass(INIT_SCANNING),
183 m_start_trx_id_range(0),
184 m_end_trx_id_range(SCAN_RANGE),
185 m_next_trx_id_range(TRX_ID_MAX) {}
186
~Innodb_trx_scan_state()187 ~Innodb_trx_scan_state() {}
188
get_pass()189 scan_pass get_pass() { return m_scan_pass; }
190
191 /** Prepare the next scan.
192 When there are TRX after the current range,
193 compute the next range.
194 When there are no more TRX for this pass,
195 advance to the next pass.
196 */
prepare_next_scan()197 void prepare_next_scan() {
198 if (m_next_trx_id_range != TRX_ID_MAX) {
199 m_start_trx_id_range =
200 m_next_trx_id_range - (m_next_trx_id_range % SCAN_RANGE);
201 m_end_trx_id_range = m_start_trx_id_range + SCAN_RANGE;
202 m_next_trx_id_range = TRX_ID_MAX;
203 } else {
204 switch (m_scan_pass) {
205 case INIT_SCANNING:
206 m_scan_pass = SCANNING_RW_TRX_LIST;
207 m_start_trx_id_range = 0;
208 m_end_trx_id_range = SCAN_RANGE;
209 m_next_trx_id_range = TRX_ID_MAX;
210 break;
211 case SCANNING_RW_TRX_LIST:
212 m_scan_pass = SCANNING_MYSQL_TRX_LIST;
213 m_start_trx_id_range = 0;
214 m_end_trx_id_range = SCAN_RANGE;
215 m_next_trx_id_range = TRX_ID_MAX;
216 break;
217 case SCANNING_MYSQL_TRX_LIST:
218 m_scan_pass = DONE_SCANNING;
219 break;
220 case DONE_SCANNING:
221 default:
222 ut_error;
223 break;
224 }
225 }
226 }
227
228 /** Check if a transaction belongs to the current range.
229 As a side effect, compute the next range.
230 @param[in] trx_id Transaction id to evaluate
231 @return True if transaction is within range.
232 */
trx_id_in_range(trx_id_t trx_id)233 bool trx_id_in_range(trx_id_t trx_id) {
234 ut_ad(trx_id < TRX_ID_MAX);
235
236 if ((m_start_trx_id_range <= trx_id) && (trx_id < m_end_trx_id_range)) {
237 return true;
238 }
239
240 if ((m_end_trx_id_range <= trx_id) && (trx_id < m_next_trx_id_range)) {
241 m_next_trx_id_range = trx_id;
242 }
243
244 return false;
245 }
246
247 private:
248 /** Current scan pass. */
249 scan_pass m_scan_pass;
250 /** Start of the current range. */
251 trx_id_t m_start_trx_id_range;
252 /** End of the current range. */
253 trx_id_t m_end_trx_id_range;
254 /** Next range. */
255 trx_id_t m_next_trx_id_range;
256 };
257
258 /** Inspect data locks for the innodb storage engine. */
259 class Innodb_data_lock_iterator : public PSI_engine_data_lock_iterator {
260 public:
261 Innodb_data_lock_iterator();
262 ~Innodb_data_lock_iterator();
263
264 virtual bool scan(PSI_server_data_lock_container *container,
265 bool with_lock_data);
266
267 virtual bool fetch(PSI_server_data_lock_container *container,
268 const char *engine_lock_id, size_t engine_lock_id_length,
269 bool with_lock_data);
270
271 private:
272 /** Scan a trx list.
273 @param[in] container The container to fill
274 @param[in] with_lock_data True if column LOCK_DATA
275 needs to be populated.
276 @param[in] read_write True if the trx list is the RW list
277 @param[in] trx_list The trx list to scan
278 @returns The number of records found
279 */
280 size_t scan_trx_list(PSI_server_data_lock_container *container,
281 bool with_lock_data, bool read_write,
282 trx_ut_list_t *trx_list);
283
284 /** Scan a given trx.
285 Either scan all the locks for a transaction,
286 or scan only records matching a given lock.
287 @param[in] container The container to fill
288 @param[in] with_lock_data True if column LOCK_DATA
289 needs to be populated.
290 @param[in] trx The trx to scan
291 @param[in] with_filter True if looking for a specific record
292 only.
293 @param[in] filter_lock_immutable_id Immutable id of lock_t we are looking
294 for
295 @param[in] filter_heap_id Heap id to look for, when filtering
296 @returns The number of records found
297 */
298 size_t scan_trx(PSI_server_data_lock_container *container,
299 bool with_lock_data, const trx_t *trx, bool with_filter,
300 uint64_t filter_lock_immutable_id = 0,
301 ulint filter_heap_id = 0);
302
303 /** Current scan state. */
304 Innodb_trx_scan_state m_scan_state;
305 };
306
307 /** Inspect data lock waits for the innodb storage engine. */
308 class Innodb_data_lock_wait_iterator
309 : public PSI_engine_data_lock_wait_iterator {
310 public:
311 Innodb_data_lock_wait_iterator();
312 ~Innodb_data_lock_wait_iterator();
313
314 virtual bool scan(PSI_server_data_lock_wait_container *container);
315
316 virtual bool fetch(PSI_server_data_lock_wait_container *container,
317 const char *requesting_engine_lock_id,
318 size_t requesting_engine_lock_id_length,
319 const char *blocking_engine_lock_id,
320 size_t blocking_engine_lock_id_length);
321
322 private:
323 /** Scan a given transaction list.
324 @param[in] container The container to fill
325 @param[in] read_write True if the transaction list is the RW list.
326 @param[in] trx_list The trx list to scan
327 @returns the number of records found.
328 */
329 size_t scan_trx_list(PSI_server_data_lock_wait_container *container,
330 bool read_write, trx_ut_list_t *trx_list);
331
332 /** Scan a given transaction.
333 Either scan all the waits for a transaction,
334 or scan only records matching a given wait.
335 @param[in] container The container to fill
336 @param[in] trx The trx to scan
337 @param[in] with_filter True if looking for a given wait only.
338 @param[in] filter_requesting_lock_immutable_id Immutable id of lock_t for
339 the requesting lock, when filtering
340 @param[in] filter_blocking_lock_immutable_id Immutable id of lock_t
341 for the blocking lock, when filtering
342 @returns the number of records found.
343 */
344 size_t scan_trx(PSI_server_data_lock_wait_container *container,
345 const trx_t *trx, bool with_filter,
346 uint64_t filter_requesting_lock_immutable_id = 0,
347 uint64_t filter_blocking_lock_immutable_id = 0);
348
349 /** Current scan state. */
350 Innodb_trx_scan_state m_scan_state;
351 };
352
353 /** Check if a transaction should be discarded.
354 Transactions present in any TRX LIST that have not started yet
355 are discarded, when inspecting data locks.
356 Transactions present in the MySQL TRX LIST,
357 that are writing data and have an id, are also discarded.
358 @param[in] trx Transaction to evaluate
359 @param[in] read_write True if trx is in the RW TRX list
360 @returns True if the trx should be discarded
361 */
discard_trx(const trx_t * trx,bool read_write)362 bool discard_trx(const trx_t *trx, bool read_write) {
363 if (!trx_is_started(trx)) {
364 return true;
365 }
366
367 if ((!read_write && trx->id != 0 && !trx->read_only)) {
368 return true;
369 }
370
371 return false;
372 }
373
374 /** Find a transaction in a TRX LIST.
375 @param[in] filter_trx_immutable_id The transaction immutable id
376 @param[in] read_write True for the RW TRX LIST
377 @param[in] trx_list The transaction list
378 @returns The transaction when found, or NULL
379 */
fetch_trx_in_trx_list(uint64_t filter_trx_immutable_id,bool read_write,trx_ut_list_t * trx_list)380 static const trx_t *fetch_trx_in_trx_list(uint64_t filter_trx_immutable_id,
381 bool read_write,
382 trx_ut_list_t *trx_list) {
383 const trx_t *trx;
384
385 /* It is not obvious if and why we need lock_sys exclusive access, but we do
386 own exclusive latch here, so treat this assert more as a documentation */
387 ut_ad(locksys::owns_exclusive_global_latch());
388 ut_ad(trx_sys_mutex_own());
389
390 for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
391 trx = get_next_trx(trx, read_write)) {
392 if (discard_trx(trx, read_write)) {
393 continue;
394 }
395
396 if (filter_trx_immutable_id == trx_immutable_id(trx)) {
397 return trx;
398 }
399 }
400
401 return nullptr;
402 }
403
Innodb_data_lock_inspector()404 Innodb_data_lock_inspector::Innodb_data_lock_inspector() {}
405
~Innodb_data_lock_inspector()406 Innodb_data_lock_inspector::~Innodb_data_lock_inspector() {}
407
408 PSI_engine_data_lock_iterator *
create_data_lock_iterator()409 Innodb_data_lock_inspector::create_data_lock_iterator() {
410 return new Innodb_data_lock_iterator();
411 }
412
413 PSI_engine_data_lock_wait_iterator *
create_data_lock_wait_iterator()414 Innodb_data_lock_inspector::create_data_lock_wait_iterator() {
415 return new Innodb_data_lock_wait_iterator();
416 }
417
destroy_data_lock_iterator(PSI_engine_data_lock_iterator * it)418 void Innodb_data_lock_inspector::destroy_data_lock_iterator(
419 PSI_engine_data_lock_iterator *it) {
420 delete it;
421 }
422
destroy_data_lock_wait_iterator(PSI_engine_data_lock_wait_iterator * it)423 void Innodb_data_lock_inspector::destroy_data_lock_wait_iterator(
424 PSI_engine_data_lock_wait_iterator *it) {
425 delete it;
426 }
427
428 /** Allocate identifier in performance schema container.
429 @param[in] container The container to fill
430 @param[in] id_str The identifier string
431 @param[out] id_length The identifier string length
432 @returns string allocated in the performance schema container.
433 */
alloc_identifier(PSI_server_data_lock_container * container,std::string & id_str,size_t * id_length)434 const char *alloc_identifier(PSI_server_data_lock_container *container,
435 std::string &id_str, size_t *id_length) {
436 *id_length = id_str.length();
437 const char *id_name = nullptr;
438
439 if (*id_length > 0) {
440 id_name = container->cache_data(id_str.c_str(), *id_length);
441 }
442
443 return (id_name);
444 }
445
446 /** Parse a table path string.
447 Isolate the table schema, name, partition and sub partition
448 from a table path string.
449 Convert these strings and store them in the performance schema container.
450 @note String returned are not zero terminated.
451 @param[in] container The container to fill
452 @param[in] table_path The table path string
453 @param[in] table_path_length The table path string length
454 @param[out] table_schema The table schema
455 @param[out] table_schema_length The table schema length
456 @param[out] table_name The table name
457 @param[out] table_name_length The table name length
458 @param[out] partition_name Partition name
459 @param[out] partition_name_length Partition name length
460 @param[out] subpartition_name Sub partition name
461 @param[out] subpartition_name_length Sub partition name length
462 */
parse_table_path(PSI_server_data_lock_container * container,const char * table_path,size_t table_path_length,const char ** table_schema,size_t * table_schema_length,const char ** table_name,size_t * table_name_length,const char ** partition_name,size_t * partition_name_length,const char ** subpartition_name,size_t * subpartition_name_length)463 void parse_table_path(PSI_server_data_lock_container *container,
464 const char *table_path, size_t table_path_length,
465 const char **table_schema, size_t *table_schema_length,
466 const char **table_name, size_t *table_name_length,
467 const char **partition_name,
468 size_t *partition_name_length,
469 const char **subpartition_name,
470 size_t *subpartition_name_length) {
471 std::string dict_table(table_path);
472
473 /* Get schema and table name in system cs. */
474 std::string schema;
475 std::string table;
476 std::string partition;
477 bool is_tmp;
478 dict_name::get_table(dict_table, true, schema, table, partition, is_tmp);
479
480 std::string part;
481 std::string sub_part;
482 if (!partition.empty()) {
483 ut_ad(dict_name::is_partition(dict_table));
484 /* Get schema partition and sub-partition name in system cs. */
485 dict_name::get_partition(partition, true, part, sub_part);
486 }
487
488 *table_schema = alloc_identifier(container, schema, table_schema_length);
489 *table_name = alloc_identifier(container, table, table_name_length);
490 *partition_name = alloc_identifier(container, part, partition_name_length);
491 *subpartition_name =
492 alloc_identifier(container, sub_part, subpartition_name_length);
493 }
494
495 /** Print a table lock id.
496 @param[in] lock The lock to print
497 @param[in,out] lock_id Printing buffer
498 @param[in] lock_id_size Printing buffer length
499 */
print_table_lock_id(const lock_t * lock,char * lock_id,size_t lock_id_size)500 void print_table_lock_id(const lock_t *lock, char *lock_id,
501 size_t lock_id_size) {
502 /* We try to be backward compatible with INFORMATION_SCHEMA so that one can
503 join with INFORMATION_SCHEMA.innodb_trx.trx_requested_lock_id column */
504 i_s_locks_row_t row;
505 fill_locks_row(&row, lock, ULINT_UNDEFINED);
506 trx_i_s_create_lock_id(&row, lock_id, lock_id_size);
507 }
508
509 /** Print a record lock id.
510 @param[in] lock The lock to print
511 @param[in] heap_no Lock heap number
512 @param[in,out] lock_id Printing buffer
513 @param[in] lock_id_size Printing buffer length
514 */
print_record_lock_id(const lock_t * lock,ulint heap_no,char * lock_id,size_t lock_id_size)515 void print_record_lock_id(const lock_t *lock, ulint heap_no, char *lock_id,
516 size_t lock_id_size) {
517 /* We try to be backward compatible with INFORMATION_SCHEMA so that one can
518 join with INFORMATION_SCHEMA.innodb_trx.trx_requested_lock_id column */
519 i_s_locks_row_t row;
520 fill_locks_row(&row, lock, heap_no);
521 trx_i_s_create_lock_id(&row, lock_id, lock_id_size);
522 }
523
524 /** Print a lock id.
525 @param[in] lock The lock to print
526 @param[in] heap_no Lock heap number
527 @param[in,out] lock_id Printing buffer
528 @param[in] lock_id_size Printing buffer length
529 */
print_lock_id(const lock_t * lock,ulint heap_no,char * lock_id,size_t lock_id_size)530 void print_lock_id(const lock_t *lock, ulint heap_no, char *lock_id,
531 size_t lock_id_size) {
532 switch (lock_get_type(lock)) {
533 case LOCK_TABLE:
534 print_table_lock_id(lock, lock_id, lock_id_size);
535 break;
536 case LOCK_REC:
537 print_record_lock_id(lock, heap_no, lock_id, lock_id_size);
538 break;
539 default:
540 ut_error;
541 }
542 }
543
544 /** Scan a lock id string and extract information necessary to find a row
545 by primary key.
546 @param[in] lock_id The lock id string to parse
547 @param[out] trx_immutable_id The immutable id of lock->trx
548 @param[out] lock_immutable_id The immutable id of lock
549 @param[out] heap_id The heap number found, for record locks
550 @returns The type of lock found.
551 @retval LOCK_TABLE Table lock
552 @retval LOCK_REC Record lock
553 @retval 0 Format error
554 */
scan_lock_id(const char * lock_id,uint64_t * trx_immutable_id,uint64_t * lock_immutable_id,ulint * heap_id)555 int scan_lock_id(const char *lock_id, uint64_t *trx_immutable_id,
556 uint64_t *lock_immutable_id, ulint *heap_id) {
557 i_s_locks_row_t row;
558 int lock_type = trx_i_s_parse_lock_id(lock_id, &row);
559 if (!lock_type) {
560 return 0;
561 }
562 *trx_immutable_id = row.lock_trx_immutable_id;
563 *lock_immutable_id = row.lock_immutable_id;
564 if (lock_type == LOCK_REC) {
565 *heap_id = row.lock_rec;
566 }
567 return lock_type;
568 }
569
Innodb_data_lock_iterator()570 Innodb_data_lock_iterator::Innodb_data_lock_iterator() {}
571
~Innodb_data_lock_iterator()572 Innodb_data_lock_iterator::~Innodb_data_lock_iterator() {}
573
scan(PSI_server_data_lock_container * container,bool with_lock_data)574 bool Innodb_data_lock_iterator::scan(PSI_server_data_lock_container *container,
575 bool with_lock_data) {
576 if (m_scan_state.get_pass() == INIT_SCANNING) {
577 if (!container->accept_engine(g_engine, g_engine_length)) {
578 return true;
579 }
580
581 m_scan_state.prepare_next_scan();
582 }
583
584 if (m_scan_state.get_pass() == DONE_SCANNING) {
585 return true;
586 }
587
588 /* We want locks reported in a single scan to be a consistent snapshot. */
589 locksys::Global_exclusive_latch_guard guard{};
590
591 trx_sys_mutex_enter();
592
593 size_t found = 0;
594
595 while ((m_scan_state.get_pass() == SCANNING_RW_TRX_LIST) && (found == 0)) {
596 found =
597 scan_trx_list(container, with_lock_data, true, &trx_sys->rw_trx_list);
598 m_scan_state.prepare_next_scan();
599 }
600
601 while ((m_scan_state.get_pass() == SCANNING_MYSQL_TRX_LIST) && (found == 0)) {
602 found = scan_trx_list(container, with_lock_data, false,
603 &trx_sys->mysql_trx_list);
604 m_scan_state.prepare_next_scan();
605 }
606
607 trx_sys_mutex_exit();
608
609 return false;
610 }
611
fetch(PSI_server_data_lock_container * container,const char * engine_lock_id,size_t engine_lock_id_length,bool with_lock_data)612 bool Innodb_data_lock_iterator::fetch(PSI_server_data_lock_container *container,
613 const char *engine_lock_id,
614 size_t engine_lock_id_length,
615 bool with_lock_data) {
616 int record_type;
617 uint64_t trx_immutable_id;
618 ulint heap_id;
619 uint64_t lock_immutable_id;
620 const trx_t *trx;
621
622 if (!container->accept_engine(g_engine, g_engine_length)) {
623 return true;
624 }
625
626 record_type = scan_lock_id(engine_lock_id, &trx_immutable_id,
627 &lock_immutable_id, &heap_id);
628
629 if (record_type == 0) {
630 return true;
631 }
632
633 /* scan_trx() requires exclusive global latch to iterate over locks of trx */
634 locksys::Global_exclusive_latch_guard guard{};
635
636 trx_sys_mutex_enter();
637
638 trx = fetch_trx_in_trx_list(trx_immutable_id, true, &trx_sys->rw_trx_list);
639
640 if (trx == nullptr) {
641 trx = fetch_trx_in_trx_list(trx_immutable_id, false,
642 &trx_sys->mysql_trx_list);
643 }
644
645 if (trx != nullptr) {
646 scan_trx(container, with_lock_data, trx, true, lock_immutable_id, heap_id);
647 }
648
649 trx_sys_mutex_exit();
650
651 return true;
652 }
653
654 /** Scan a trx list.
655 @param[in] container The container to fill
656 @param[in] with_lock_data True if column LOCK_DATA
657 needs to be populated.
658 @param[in] read_write True if the trx list is the RW list
659 @param[in] trx_list The trx list to scan
660 @returns The number of records found
661 */
scan_trx_list(PSI_server_data_lock_container * container,bool with_lock_data,bool read_write,trx_ut_list_t * trx_list)662 size_t Innodb_data_lock_iterator::scan_trx_list(
663 PSI_server_data_lock_container *container, bool with_lock_data,
664 bool read_write, trx_ut_list_t *trx_list) {
665 const trx_t *trx;
666 trx_id_t trx_id;
667 size_t found = 0;
668
669 /* We are about to scan over various locks of multiple transactions not
670 limited to any particular shard thus we need an exclusive latch on lock_sys */
671 ut_ad(locksys::owns_exclusive_global_latch());
672 ut_ad(trx_sys_mutex_own());
673
674 for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
675 trx = get_next_trx(trx, read_write)) {
676 if (discard_trx(trx, read_write)) {
677 continue;
678 }
679
680 trx_id = trx_get_id_for_print(trx);
681
682 if (!m_scan_state.trx_id_in_range(trx_id)) {
683 continue;
684 }
685
686 found += scan_trx(container, with_lock_data, trx, false);
687 }
688
689 return found;
690 }
691
692 /** Scan a given trx.
693 Either scan all the locks for a transaction,
694 or scan only records matching a given lock.
695 @param[in] container The container to fill
696 @param[in] with_lock_data True if column LOCK_DATA
697 needs to be populated.
698 @param[in] trx The trx to scan
699 @param[in] with_filter True if looking for a specific
700 record only.
701 @param[in] filter_lock_immutable_id Immutable id of lock_t we are looking for
702 @param[in] filter_heap_id Heap id to look for, when filtering
703 @returns The number of records found
704 */
scan_trx(PSI_server_data_lock_container * container,bool with_lock_data,const trx_t * trx,bool with_filter,uint64_t filter_lock_immutable_id,ulint filter_heap_id)705 size_t Innodb_data_lock_iterator::scan_trx(
706 PSI_server_data_lock_container *container, bool with_lock_data,
707 const trx_t *trx, bool with_filter, uint64_t filter_lock_immutable_id,
708 ulint filter_heap_id) {
709 assert_trx_nonlocking_or_in_list(trx);
710
711 size_t found = 0;
712 const lock_t *lock;
713 ulonglong trx_id;
714 ulonglong thread_id;
715 ulonglong event_id;
716 const char *table_path;
717 const char *table_schema;
718 size_t table_schema_length;
719 const char *table_name;
720 size_t table_name_length;
721 const char *partition_name;
722 size_t partition_name_length;
723 const char *subpartition_name;
724 size_t subpartition_name_length;
725 const char *index_name;
726 size_t index_name_length;
727 const void *identity;
728 const char *lock_mode_str;
729 const char *lock_type_str;
730 const char *lock_status_str = "GRANTED";
731 const char *lock_data_str;
732 char engine_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
733 size_t engine_lock_id_length;
734 ulint heap_no;
735 int record_type;
736 lock_t *wait_lock;
737 ut_ad(locksys::owns_exclusive_global_latch());
738 wait_lock = trx->lock.wait_lock;
739
740 trx_id = trx_get_id_for_print(trx);
741
742 if (!container->accept_transaction_id(trx_id)) {
743 return 0;
744 }
745
746 for (lock = lock_get_first_trx_locks(&trx->lock); lock != nullptr;
747 lock = lock_get_next_trx_locks(lock)) {
748 record_type = lock_get_type(lock);
749
750 if (with_filter &&
751 filter_lock_immutable_id != lock_get_immutable_id(lock)) {
752 continue;
753 }
754
755 lock_get_psi_event(lock, &thread_id, &event_id);
756
757 if (!container->accept_thread_id_event_id(thread_id, event_id)) {
758 continue;
759 }
760
761 table_path = lock_get_table_name(lock).m_name;
762 parse_table_path(container, table_path, strlen(table_path), &table_schema,
763 &table_schema_length, &table_name, &table_name_length,
764 &partition_name, &partition_name_length,
765 &subpartition_name, &subpartition_name_length);
766
767 if (!container->accept_object(table_schema, table_schema_length, table_name,
768 table_name_length, partition_name,
769 partition_name_length, subpartition_name,
770 subpartition_name_length)) {
771 continue;
772 }
773
774 identity = lock;
775 lock_mode_str = lock_get_mode_str(lock);
776 lock_type_str = lock_get_type_str(lock);
777
778 if (lock == wait_lock) {
779 lock_status_str = "WAITING";
780 }
781
782 switch (record_type) {
783 case LOCK_TABLE:
784 print_table_lock_id(lock, engine_lock_id, sizeof(engine_lock_id));
785 engine_lock_id_length = strlen(engine_lock_id);
786
787 if (container->accept_lock_id(engine_lock_id, engine_lock_id_length)) {
788 container->add_lock_row(
789 g_engine, g_engine_length, engine_lock_id, engine_lock_id_length,
790 trx_id, thread_id, event_id, table_schema, table_schema_length,
791 table_name, table_name_length, partition_name,
792 partition_name_length, subpartition_name,
793 subpartition_name_length, nullptr, 0, identity, lock_mode_str,
794 lock_type_str, lock_status_str, nullptr);
795 found++;
796 }
797 break;
798 case LOCK_REC:
799 index_name = lock_rec_get_index_name(lock);
800 index_name_length = strlen(index_name);
801
802 heap_no = lock_rec_find_set_bit(lock);
803
804 while (heap_no != ULINT_UNDEFINED) {
805 if (!with_filter || (heap_no == filter_heap_id)) {
806 print_record_lock_id(lock, heap_no, engine_lock_id,
807 sizeof(engine_lock_id));
808 engine_lock_id_length = strlen(engine_lock_id);
809
810 if (container->accept_lock_id(engine_lock_id,
811 engine_lock_id_length)) {
812 if (with_lock_data) {
813 p_s_fill_lock_data(&lock_data_str, lock, heap_no, container);
814 } else {
815 lock_data_str = nullptr;
816 }
817
818 container->add_lock_row(
819 g_engine, g_engine_length, engine_lock_id,
820 engine_lock_id_length, trx_id, thread_id, event_id,
821 table_schema, table_schema_length, table_name,
822 table_name_length, partition_name, partition_name_length,
823 subpartition_name, subpartition_name_length, index_name,
824 index_name_length, identity, lock_mode_str, lock_type_str,
825 lock_status_str, lock_data_str);
826 found++;
827 }
828 }
829
830 heap_no = lock_rec_find_next_set_bit(lock, heap_no);
831 }
832 break;
833 default:
834 ut_error;
835 }
836 }
837
838 return found;
839 }
840
Innodb_data_lock_wait_iterator()841 Innodb_data_lock_wait_iterator::Innodb_data_lock_wait_iterator()
842
843 {}
844
~Innodb_data_lock_wait_iterator()845 Innodb_data_lock_wait_iterator::~Innodb_data_lock_wait_iterator() {}
846
scan(PSI_server_data_lock_wait_container * container)847 bool Innodb_data_lock_wait_iterator::scan(
848 PSI_server_data_lock_wait_container *container) {
849 if (m_scan_state.get_pass() == INIT_SCANNING) {
850 if (!container->accept_engine(g_engine, g_engine_length)) {
851 return true;
852 }
853
854 m_scan_state.prepare_next_scan();
855 }
856
857 if (m_scan_state.get_pass() == DONE_SCANNING) {
858 return true;
859 }
860
861 /* We want locks reported in a single scan to be a consistent snapshot. */
862 locksys::Global_exclusive_latch_guard guard{};
863
864 trx_sys_mutex_enter();
865
866 size_t found = 0;
867
868 while ((m_scan_state.get_pass() == SCANNING_RW_TRX_LIST) && (found == 0)) {
869 found = scan_trx_list(container, true, &trx_sys->rw_trx_list);
870 m_scan_state.prepare_next_scan();
871 }
872
873 while ((m_scan_state.get_pass() == SCANNING_MYSQL_TRX_LIST) && (found == 0)) {
874 found = scan_trx_list(container, false, &trx_sys->mysql_trx_list);
875 m_scan_state.prepare_next_scan();
876 }
877
878 trx_sys_mutex_exit();
879
880 return false;
881 }
882
fetch(PSI_server_data_lock_wait_container * container,const char * requesting_engine_lock_id,size_t requesting_engine_lock_id_length,const char * blocking_engine_lock_id,size_t blocking_engine_lock_id_length)883 bool Innodb_data_lock_wait_iterator::fetch(
884 PSI_server_data_lock_wait_container *container,
885 const char *requesting_engine_lock_id,
886 size_t requesting_engine_lock_id_length,
887 const char *blocking_engine_lock_id,
888 size_t blocking_engine_lock_id_length) {
889 int requesting_record_type;
890 uint64_t requesting_trx_immutable_id;
891 ulint requesting_heap_id;
892 uint64_t requesting_lock_immutable_id;
893 int blocking_record_type;
894 uint64_t blocking_trx_immutable_id;
895 ulint blocking_heap_id;
896 uint64_t blocking_lock_immutable_id;
897 const trx_t *trx;
898
899 if (!container->accept_engine(g_engine, g_engine_length)) {
900 return true;
901 }
902
903 requesting_record_type =
904 scan_lock_id(requesting_engine_lock_id, &requesting_trx_immutable_id,
905 &requesting_lock_immutable_id, &requesting_heap_id);
906
907 if (requesting_record_type == 0) {
908 return true;
909 }
910
911 blocking_record_type =
912 scan_lock_id(blocking_engine_lock_id, &blocking_trx_immutable_id,
913 &blocking_lock_immutable_id, &blocking_heap_id);
914
915 if (blocking_record_type == 0) {
916 return true;
917 }
918
919 /* scan_trx() requires exclusive global latch to iterate over locks of trx */
920 locksys::Global_exclusive_latch_guard guard{};
921
922 trx_sys_mutex_enter();
923
924 trx = fetch_trx_in_trx_list(requesting_trx_immutable_id, true,
925 &trx_sys->rw_trx_list);
926
927 if (trx == nullptr) {
928 trx = fetch_trx_in_trx_list(requesting_trx_immutable_id, false,
929 &trx_sys->mysql_trx_list);
930 }
931
932 if (trx != nullptr) {
933 scan_trx(container, trx, true, requesting_lock_immutable_id,
934 blocking_lock_immutable_id);
935 }
936
937 trx_sys_mutex_exit();
938
939 return true;
940 }
941
942 /** Scan a given transaction list.
943 @param[in] container The container to fill
944 @param[in] read_write True if the transaction list is the RW list.
945 @param[in] trx_list The trx list to scan
946 @returns the number of records found.
947 */
scan_trx_list(PSI_server_data_lock_wait_container * container,bool read_write,trx_ut_list_t * trx_list)948 size_t Innodb_data_lock_wait_iterator::scan_trx_list(
949 PSI_server_data_lock_wait_container *container, bool read_write,
950 trx_ut_list_t *trx_list) {
951 const trx_t *trx;
952 trx_id_t trx_id;
953 size_t found = 0;
954
955 /* We are about to scan over various locks of multiple transactions not
956 limited to any particular shard thus we need an exclusive latch on lock_sys */
957 ut_ad(locksys::owns_exclusive_global_latch());
958 ut_ad(trx_sys_mutex_own());
959
960 for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
961 trx = get_next_trx(trx, read_write)) {
962 if (discard_trx(trx, read_write)) {
963 continue;
964 }
965
966 trx_id = trx_get_id_for_print(trx);
967
968 if (!m_scan_state.trx_id_in_range(trx_id)) {
969 continue;
970 }
971
972 found += scan_trx(container, trx, false);
973 }
974
975 return found;
976 }
977
978 /** Scan a given transaction.
979 Either scan all the waits for a transaction,
980 or scan only records matching a given wait.
981 @param[in] container The container to fill
982 @param[in] trx The trx to scan
983 @param[in] with_filter True if looking for a given wait only.
984 @param[in] filter_requesting_lock_immutable_id Immutable id of
985 lock_t for the requesting lock, when filtering
986 @param[in] filter_blocking_lock_immutable_id Immutable idof
987 lock_t for the blocking lock, when filtering
988 @returns the number of records found.
989 */
scan_trx(PSI_server_data_lock_wait_container * container,const trx_t * trx,bool with_filter,uint64_t filter_requesting_lock_immutable_id,uint64_t filter_blocking_lock_immutable_id)990 size_t Innodb_data_lock_wait_iterator::scan_trx(
991 PSI_server_data_lock_wait_container *container, const trx_t *trx,
992 bool with_filter, uint64_t filter_requesting_lock_immutable_id,
993 uint64_t filter_blocking_lock_immutable_id) {
994 assert_trx_nonlocking_or_in_list(trx);
995
996 if (trx->lock.que_state != TRX_QUE_LOCK_WAIT) {
997 return 0;
998 }
999
1000 ulonglong requesting_trx_id;
1001 ulonglong requesting_thread_id;
1002 ulonglong requesting_event_id;
1003 const void *requesting_identity;
1004 char requesting_engine_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
1005 size_t requesting_engine_lock_id_length;
1006 ulonglong blocking_trx_id;
1007 ulonglong blocking_thread_id;
1008 ulonglong blocking_event_id;
1009 const void *blocking_identity;
1010 char blocking_engine_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
1011 size_t blocking_engine_lock_id_length;
1012 ut_ad(locksys::owns_exclusive_global_latch());
1013 lock_t *wait_lock = trx->lock.wait_lock;
1014 const lock_t *curr_lock;
1015 int requesting_record_type;
1016 size_t found = 0;
1017 lock_queue_iterator_t iter;
1018
1019 ut_a(wait_lock != nullptr);
1020
1021 requesting_record_type = lock_get_type(wait_lock);
1022
1023 if (with_filter &&
1024 lock_get_immutable_id(wait_lock) != filter_requesting_lock_immutable_id) {
1025 return 0;
1026 }
1027
1028 requesting_trx_id = trx_get_id_for_print(trx);
1029 if (!container->accept_requesting_transaction_id(requesting_trx_id)) {
1030 return 0;
1031 }
1032
1033 lock_get_psi_event(wait_lock, &requesting_thread_id, &requesting_event_id);
1034 if (!container->accept_requesting_thread_id_event_id(requesting_thread_id,
1035 requesting_event_id)) {
1036 return 0;
1037 }
1038
1039 ulint heap_no = 0;
1040 if (requesting_record_type == LOCK_REC) {
1041 heap_no = lock_rec_find_set_bit(wait_lock);
1042 }
1043
1044 print_lock_id(wait_lock, heap_no, requesting_engine_lock_id,
1045 sizeof(requesting_engine_lock_id));
1046 requesting_engine_lock_id_length = strlen(requesting_engine_lock_id);
1047 if (!container->accept_requesting_lock_id(requesting_engine_lock_id,
1048 requesting_engine_lock_id_length)) {
1049 return 0;
1050 }
1051
1052 requesting_identity = wait_lock;
1053 lock_queue_iterator_reset(&iter, wait_lock, ULINT_UNDEFINED);
1054
1055 for (curr_lock = lock_queue_iterator_get_prev(&iter); curr_lock != nullptr;
1056 curr_lock = lock_queue_iterator_get_prev(&iter)) {
1057 if (with_filter &&
1058 lock_get_immutable_id(curr_lock) != filter_blocking_lock_immutable_id) {
1059 continue;
1060 }
1061
1062 if (lock_has_to_wait(wait_lock, curr_lock)) {
1063 blocking_trx_id = lock_get_trx_id(curr_lock);
1064 if (!container->accept_blocking_transaction_id(blocking_trx_id)) {
1065 continue;
1066 }
1067
1068 lock_get_psi_event(curr_lock, &blocking_thread_id, &blocking_event_id);
1069 if (!container->accept_blocking_thread_id_event_id(blocking_thread_id,
1070 blocking_event_id)) {
1071 continue;
1072 }
1073
1074 blocking_identity = curr_lock;
1075 print_lock_id(curr_lock, heap_no, blocking_engine_lock_id,
1076 sizeof(blocking_engine_lock_id));
1077 blocking_engine_lock_id_length = strlen(blocking_engine_lock_id);
1078 if (!container->accept_blocking_lock_id(blocking_engine_lock_id,
1079 blocking_engine_lock_id_length)) {
1080 continue;
1081 }
1082
1083 container->add_lock_wait_row(
1084 g_engine, g_engine_length, requesting_engine_lock_id,
1085 requesting_engine_lock_id_length, requesting_trx_id,
1086 requesting_thread_id, requesting_event_id, requesting_identity,
1087 blocking_engine_lock_id, blocking_engine_lock_id_length,
1088 blocking_trx_id, blocking_thread_id, blocking_event_id,
1089 blocking_identity);
1090 found++;
1091 }
1092 }
1093
1094 return found;
1095 }
1096