1 /*****************************************************************************
2 
3 Copyright (c) 2016, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file handler/p_s.cc
28  InnoDB performance_schema tables interface to MySQL.
29 
30  *******************************************************/
31 
32 #include "storage/innobase/handler/p_s.h"
33 
34 #include <stdlib.h>
35 #include <sys/types.h>
36 
37 #include "lock0iter.h"
38 #include "lock0lock.h"
39 #include "sql_table.h"
40 #include "table.h"
41 #include "trx0i_s.h"
42 #include "trx0sys.h"
43 
44 #include "my_io.h"
45 
46 /**
47   @page PAGE_INNODB_PFS Innodb data lock instrumentation
48   Innodb Performance Schema data lock instrumentation
49 
50   @section data_lock_iterators Data lock iterators
51 
52   To provide content to the performance_schema.data_locks table,
53   innodb implements #Innodb_data_lock_iterator.
54 
55   Likewise, table performance_schema.data_wait_locks
56   is populated with #Innodb_data_lock_wait_iterator.
57 
58   Both these iterators need to return the data present
59   in the innodb engine memory,
60   which imply to take the proper mutex locks when inspecting it.
61   The structure to inspect here is the transaction list (#trx_sys)
62 
63   How to implement this scan is critical for performances.
64 
65   @subsection no_full_scan No full scan
66 
67   Consider this implementation:
68   - Take all necessary locks
69   - Scan all the innodb internal locks
70   - Report all of them to the performance schema
71   - Release all the locks taken
72 
73   This implementation materializes the entire table.
74 
75   The benefits with this approach are:
76   - The materialized table is consistent
77 
78   The problems with this approach are:
79   - The innodb engine is frozen for the entire duration,
80   for a time that is unpredictable.
81   - Memory consumption spikes, without bounds
82   - Materializing all rows upfront is incompatible with supporting an index
83 
84   For example with N = 10,000 transactions,
85   a single scan reports all 10,000 transaction locks.
86 
87   This alternative is rejected.
88 
89   @subsection no_single_row_scan No single row scan
90 
91   Consider this implementation:
92   - Take all necessary locks
93   - Resume the scan on innodb internal locks for 1 record
94   - Report this record to the performance schema
95   - Release all the locks taken
96 
97   This implementation returns a row for a single transaction,
98   or even a single lock, at a time.
99 
100   The benefits with this approach are:
101   - Memory consumption is well bounded, and low.
102 
103   The problems with this approach are:
104   - Data reported can be very inconsistent.
105   - Implementing a restartable scan, on a very dynamic structure,
106   without holding any lock, is complex.
107   - Even assuming how to implement a scan is resolved,
108   looping N times to find element i, i+1, i+2 ... in a list
109   ends up having a complexity in O(N^2), consuming CPU.
110 
111   For example with N = 10,000 transactions,
112   the trx_list would be scanned 10,000 times
113   to return 1 record each time.
114   The total number of operations on the list is 100 Millions.
115 
116   This alternative is rejected.
117 
118   @subsection restartable_batch_scan Restartable batch scan
119 
120   What is implemented is:
121   - Take all necessary locks
122   - Resume the scan on innodb internal locks,
123   for a given record range
124   - Report all the records in the range to the performance schema
125   - Release all the locks taken
126 
127   This is a compromise, with the following properties:
128   - Memory consumption is bounded,
129     by the number of records returned in each range.
130   - The duration of mutex locks on innodb structures is bounded
131     by the number of records in each range
132   - The data returned is not consistent,
133     but at least it is "consistent by chunks"
134   - The overall scan complexity is (N/RANGE)^2, where RANGE is the range size.
135   This is still technically O(N^2), but in practice should be reasonable.
136 
137   For example with N = 10,000 transactions and RANGE = 256,
138   there are 40 batches at the trx list,
139   where each batch reports (up to) 256 trx, with the trx locks.
140   The total number of operations on the list is 400 thousands.
141 */
142 
143 static const char *g_engine = "INNODB";
144 static const size_t g_engine_length = 6;
145 
get_next_trx(const trx_t * trx,bool read_write)146 inline trx_t *get_next_trx(const trx_t *trx, bool read_write) {
147   if (read_write) {
148     return (UT_LIST_GET_NEXT(trx_list, trx));
149   } else {
150     return (UT_LIST_GET_NEXT(mysql_trx_list, trx));
151   }
152 }
153 
154 /** Pass of a given scan. */
155 enum scan_pass {
156   INIT_SCANNING,
157   /** Scan the RW trx list.
158   @sa trx_sys_t::rw_trx_list
159   */
160   SCANNING_RW_TRX_LIST,
161   /** Scan the MySQL trx list.
162   @sa trx_t::mysql_trx_list
163   */
164   SCANNING_MYSQL_TRX_LIST,
165   DONE_SCANNING
166 };
167 
168 /** State of a given scan.
169 Scans are restartable, and done in multiple calls.
170 Overall, the code scans separately:
171 - the RW trx list
172 - the MySQL trx list
173 For each list, the scan is done by ranges of trx_id values.
174 Saving the current scan state allows to resume where the previous
175 scan ended.
176 */
177 class Innodb_trx_scan_state {
178  public:
179   const trx_id_t SCAN_RANGE = 256;
180 
Innodb_trx_scan_state()181   Innodb_trx_scan_state()
182       : m_scan_pass(INIT_SCANNING),
183         m_start_trx_id_range(0),
184         m_end_trx_id_range(SCAN_RANGE),
185         m_next_trx_id_range(TRX_ID_MAX) {}
186 
~Innodb_trx_scan_state()187   ~Innodb_trx_scan_state() {}
188 
get_pass()189   scan_pass get_pass() { return m_scan_pass; }
190 
191   /** Prepare the next scan.
192   When there are TRX after the current range,
193   compute the next range.
194   When there are no more TRX for this pass,
195   advance to the next pass.
196   */
prepare_next_scan()197   void prepare_next_scan() {
198     if (m_next_trx_id_range != TRX_ID_MAX) {
199       m_start_trx_id_range =
200           m_next_trx_id_range - (m_next_trx_id_range % SCAN_RANGE);
201       m_end_trx_id_range = m_start_trx_id_range + SCAN_RANGE;
202       m_next_trx_id_range = TRX_ID_MAX;
203     } else {
204       switch (m_scan_pass) {
205         case INIT_SCANNING:
206           m_scan_pass = SCANNING_RW_TRX_LIST;
207           m_start_trx_id_range = 0;
208           m_end_trx_id_range = SCAN_RANGE;
209           m_next_trx_id_range = TRX_ID_MAX;
210           break;
211         case SCANNING_RW_TRX_LIST:
212           m_scan_pass = SCANNING_MYSQL_TRX_LIST;
213           m_start_trx_id_range = 0;
214           m_end_trx_id_range = SCAN_RANGE;
215           m_next_trx_id_range = TRX_ID_MAX;
216           break;
217         case SCANNING_MYSQL_TRX_LIST:
218           m_scan_pass = DONE_SCANNING;
219           break;
220         case DONE_SCANNING:
221         default:
222           ut_error;
223           break;
224       }
225     }
226   }
227 
228   /** Check if a transaction belongs to the current range.
229   As a side effect, compute the next range.
230   @param[in] trx_id	Transaction id to evaluate
231   @return True if transaction is within range.
232   */
trx_id_in_range(trx_id_t trx_id)233   bool trx_id_in_range(trx_id_t trx_id) {
234     ut_ad(trx_id < TRX_ID_MAX);
235 
236     if ((m_start_trx_id_range <= trx_id) && (trx_id < m_end_trx_id_range)) {
237       return true;
238     }
239 
240     if ((m_end_trx_id_range <= trx_id) && (trx_id < m_next_trx_id_range)) {
241       m_next_trx_id_range = trx_id;
242     }
243 
244     return false;
245   }
246 
247  private:
248   /** Current scan pass. */
249   scan_pass m_scan_pass;
250   /** Start of the current range. */
251   trx_id_t m_start_trx_id_range;
252   /** End of the current range. */
253   trx_id_t m_end_trx_id_range;
254   /** Next range. */
255   trx_id_t m_next_trx_id_range;
256 };
257 
258 /** Inspect data locks for the innodb storage engine. */
259 class Innodb_data_lock_iterator : public PSI_engine_data_lock_iterator {
260  public:
261   Innodb_data_lock_iterator();
262   ~Innodb_data_lock_iterator();
263 
264   virtual bool scan(PSI_server_data_lock_container *container,
265                     bool with_lock_data);
266 
267   virtual bool fetch(PSI_server_data_lock_container *container,
268                      const char *engine_lock_id, size_t engine_lock_id_length,
269                      bool with_lock_data);
270 
271  private:
272   /** Scan a trx list.
273   @param[in] container		The container to fill
274   @param[in] with_lock_data	True if column LOCK_DATA
275   needs to be populated.
276   @param[in] read_write		True if the trx list is the RW list
277   @param[in] trx_list		The trx list to scan
278   @returns The number of records found
279   */
280   size_t scan_trx_list(PSI_server_data_lock_container *container,
281                        bool with_lock_data, bool read_write,
282                        trx_ut_list_t *trx_list);
283 
284   /** Scan a given trx.
285   Either scan all the locks for a transaction,
286   or scan only records matching a given lock.
287   @param[in] container		The container to fill
288   @param[in] with_lock_data	True if column LOCK_DATA
289   needs to be populated.
290   @param[in] trx			The trx to scan
291   @param[in] with_filter		True if looking for a specific record
292   only.
293   @param[in] filter_lock_immutable_id     Immutable id of lock_t we are looking
294   for
295   @param[in] filter_heap_id	Heap id to look for, when filtering
296   @returns The number of records found
297   */
298   size_t scan_trx(PSI_server_data_lock_container *container,
299                   bool with_lock_data, const trx_t *trx, bool with_filter,
300                   uint64_t filter_lock_immutable_id = 0,
301                   ulint filter_heap_id = 0);
302 
303   /** Current scan state. */
304   Innodb_trx_scan_state m_scan_state;
305 };
306 
307 /** Inspect data lock waits for the innodb storage engine. */
308 class Innodb_data_lock_wait_iterator
309     : public PSI_engine_data_lock_wait_iterator {
310  public:
311   Innodb_data_lock_wait_iterator();
312   ~Innodb_data_lock_wait_iterator();
313 
314   virtual bool scan(PSI_server_data_lock_wait_container *container);
315 
316   virtual bool fetch(PSI_server_data_lock_wait_container *container,
317                      const char *requesting_engine_lock_id,
318                      size_t requesting_engine_lock_id_length,
319                      const char *blocking_engine_lock_id,
320                      size_t blocking_engine_lock_id_length);
321 
322  private:
323   /** Scan a given transaction list.
324   @param[in] container		The container to fill
325   @param[in] read_write		True if the transaction list is the RW list.
326   @param[in] trx_list		The trx list to scan
327   @returns the number of records found.
328   */
329   size_t scan_trx_list(PSI_server_data_lock_wait_container *container,
330                        bool read_write, trx_ut_list_t *trx_list);
331 
332   /** Scan a given transaction.
333   Either scan all the waits for a transaction,
334   or scan only records matching a given wait.
335   @param[in] container		          The container to fill
336   @param[in] trx		          The trx to scan
337   @param[in] with_filter		  True if looking for a given wait only.
338   @param[in] filter_requesting_lock_immutable_id  Immutable id of lock_t for
339   the requesting lock, when filtering
340   @param[in] filter_blocking_lock_immutable_id	  Immutable id of lock_t
341   for the blocking lock, when filtering
342   @returns the number of records found.
343   */
344   size_t scan_trx(PSI_server_data_lock_wait_container *container,
345                   const trx_t *trx, bool with_filter,
346                   uint64_t filter_requesting_lock_immutable_id = 0,
347                   uint64_t filter_blocking_lock_immutable_id = 0);
348 
349   /** Current scan state. */
350   Innodb_trx_scan_state m_scan_state;
351 };
352 
353 /** Check if a transaction should be discarded.
354 Transactions present in any TRX LIST that have not started yet
355 are discarded, when inspecting data locks.
356 Transactions present in the MySQL TRX LIST,
357 that are writing data and have an id, are also discarded.
358 @param[in] trx Transaction to evaluate
359 @param[in] read_write True if trx is in the RW TRX list
360 @returns True if the trx should be discarded
361 */
discard_trx(const trx_t * trx,bool read_write)362 bool discard_trx(const trx_t *trx, bool read_write) {
363   if (!trx_is_started(trx)) {
364     return true;
365   }
366 
367   if ((!read_write && trx->id != 0 && !trx->read_only)) {
368     return true;
369   }
370 
371   return false;
372 }
373 
374 /** Find a transaction in a TRX LIST.
375 @param[in] filter_trx_immutable_id  The transaction immutable id
376 @param[in] read_write	            True for the RW TRX LIST
377 @param[in] trx_list	            The transaction list
378 @returns The transaction when found, or NULL
379 */
fetch_trx_in_trx_list(uint64_t filter_trx_immutable_id,bool read_write,trx_ut_list_t * trx_list)380 static const trx_t *fetch_trx_in_trx_list(uint64_t filter_trx_immutable_id,
381                                           bool read_write,
382                                           trx_ut_list_t *trx_list) {
383   const trx_t *trx;
384 
385   /* It is not obvious if and why we need lock_sys exclusive access, but we do
386   own exclusive latch here, so treat this assert more as a documentation */
387   ut_ad(locksys::owns_exclusive_global_latch());
388   ut_ad(trx_sys_mutex_own());
389 
390   for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
391        trx = get_next_trx(trx, read_write)) {
392     if (discard_trx(trx, read_write)) {
393       continue;
394     }
395 
396     if (filter_trx_immutable_id == trx_immutable_id(trx)) {
397       return trx;
398     }
399   }
400 
401   return nullptr;
402 }
403 
Innodb_data_lock_inspector()404 Innodb_data_lock_inspector::Innodb_data_lock_inspector() {}
405 
~Innodb_data_lock_inspector()406 Innodb_data_lock_inspector::~Innodb_data_lock_inspector() {}
407 
408 PSI_engine_data_lock_iterator *
create_data_lock_iterator()409 Innodb_data_lock_inspector::create_data_lock_iterator() {
410   return new Innodb_data_lock_iterator();
411 }
412 
413 PSI_engine_data_lock_wait_iterator *
create_data_lock_wait_iterator()414 Innodb_data_lock_inspector::create_data_lock_wait_iterator() {
415   return new Innodb_data_lock_wait_iterator();
416 }
417 
destroy_data_lock_iterator(PSI_engine_data_lock_iterator * it)418 void Innodb_data_lock_inspector::destroy_data_lock_iterator(
419     PSI_engine_data_lock_iterator *it) {
420   delete it;
421 }
422 
destroy_data_lock_wait_iterator(PSI_engine_data_lock_wait_iterator * it)423 void Innodb_data_lock_inspector::destroy_data_lock_wait_iterator(
424     PSI_engine_data_lock_wait_iterator *it) {
425   delete it;
426 }
427 
428 /** Allocate identifier in performance schema container.
429 @param[in]	container	The container to fill
430 @param[in]	id_str		The identifier string
431 @param[out]	id_length	The identifier string length
432 @returns string allocated in the performance schema container.
433 */
alloc_identifier(PSI_server_data_lock_container * container,std::string & id_str,size_t * id_length)434 const char *alloc_identifier(PSI_server_data_lock_container *container,
435                              std::string &id_str, size_t *id_length) {
436   *id_length = id_str.length();
437   const char *id_name = nullptr;
438 
439   if (*id_length > 0) {
440     id_name = container->cache_data(id_str.c_str(), *id_length);
441   }
442 
443   return (id_name);
444 }
445 
446 /** Parse a table path string.
447 Isolate the table schema, name, partition and sub partition
448 from a table path string.
449 Convert these strings and store them in the performance schema container.
450 @note String returned are not zero terminated.
451 @param[in] container			The container to fill
452 @param[in] table_path			The table path string
453 @param[in] table_path_length		The table path string length
454 @param[out] table_schema		The table schema
455 @param[out] table_schema_length		The table schema length
456 @param[out] table_name			The table name
457 @param[out] table_name_length		The table name length
458 @param[out] partition_name		Partition name
459 @param[out] partition_name_length	Partition name length
460 @param[out] subpartition_name		Sub partition name
461 @param[out] subpartition_name_length	Sub partition name length
462 */
parse_table_path(PSI_server_data_lock_container * container,const char * table_path,size_t table_path_length,const char ** table_schema,size_t * table_schema_length,const char ** table_name,size_t * table_name_length,const char ** partition_name,size_t * partition_name_length,const char ** subpartition_name,size_t * subpartition_name_length)463 void parse_table_path(PSI_server_data_lock_container *container,
464                       const char *table_path, size_t table_path_length,
465                       const char **table_schema, size_t *table_schema_length,
466                       const char **table_name, size_t *table_name_length,
467                       const char **partition_name,
468                       size_t *partition_name_length,
469                       const char **subpartition_name,
470                       size_t *subpartition_name_length) {
471   std::string dict_table(table_path);
472 
473   /* Get schema and table name in system cs. */
474   std::string schema;
475   std::string table;
476   std::string partition;
477   bool is_tmp;
478   dict_name::get_table(dict_table, true, schema, table, partition, is_tmp);
479 
480   std::string part;
481   std::string sub_part;
482   if (!partition.empty()) {
483     ut_ad(dict_name::is_partition(dict_table));
484     /* Get schema partition and sub-partition name in system cs. */
485     dict_name::get_partition(partition, true, part, sub_part);
486   }
487 
488   *table_schema = alloc_identifier(container, schema, table_schema_length);
489   *table_name = alloc_identifier(container, table, table_name_length);
490   *partition_name = alloc_identifier(container, part, partition_name_length);
491   *subpartition_name =
492       alloc_identifier(container, sub_part, subpartition_name_length);
493 }
494 
495 /** Print a table lock id.
496 @param[in]	lock		The lock to print
497 @param[in,out]	lock_id		Printing buffer
498 @param[in]	lock_id_size	Printing buffer length
499 */
print_table_lock_id(const lock_t * lock,char * lock_id,size_t lock_id_size)500 void print_table_lock_id(const lock_t *lock, char *lock_id,
501                          size_t lock_id_size) {
502   /* We try to be backward compatible with INFORMATION_SCHEMA so that one can
503   join with INFORMATION_SCHEMA.innodb_trx.trx_requested_lock_id column */
504   i_s_locks_row_t row;
505   fill_locks_row(&row, lock, ULINT_UNDEFINED);
506   trx_i_s_create_lock_id(&row, lock_id, lock_id_size);
507 }
508 
509 /** Print a record lock id.
510 @param[in]	lock		The lock to print
511 @param[in]	heap_no		Lock heap number
512 @param[in,out]	lock_id		Printing buffer
513 @param[in]	lock_id_size	Printing buffer length
514 */
print_record_lock_id(const lock_t * lock,ulint heap_no,char * lock_id,size_t lock_id_size)515 void print_record_lock_id(const lock_t *lock, ulint heap_no, char *lock_id,
516                           size_t lock_id_size) {
517   /* We try to be backward compatible with INFORMATION_SCHEMA so that one can
518   join with INFORMATION_SCHEMA.innodb_trx.trx_requested_lock_id column */
519   i_s_locks_row_t row;
520   fill_locks_row(&row, lock, heap_no);
521   trx_i_s_create_lock_id(&row, lock_id, lock_id_size);
522 }
523 
524 /** Print a lock id.
525 @param[in]	lock		The lock to print
526 @param[in]	heap_no		Lock heap number
527 @param[in,out]	lock_id		Printing buffer
528 @param[in]	lock_id_size	Printing buffer length
529 */
print_lock_id(const lock_t * lock,ulint heap_no,char * lock_id,size_t lock_id_size)530 void print_lock_id(const lock_t *lock, ulint heap_no, char *lock_id,
531                    size_t lock_id_size) {
532   switch (lock_get_type(lock)) {
533     case LOCK_TABLE:
534       print_table_lock_id(lock, lock_id, lock_id_size);
535       break;
536     case LOCK_REC:
537       print_record_lock_id(lock, heap_no, lock_id, lock_id_size);
538       break;
539     default:
540       ut_error;
541   }
542 }
543 
544 /** Scan a lock id string and extract information necessary to find a row
545 by primary key.
546 @param[in] 	lock_id		        The lock id string to parse
547 @param[out]	trx_immutable_id	The immutable id of lock->trx
548 @param[out]	lock_immutable_id       The immutable id of lock
549 @param[out]	heap_id		        The heap number found, for record locks
550 @returns The type of lock found.
551 @retval LOCK_TABLE	Table lock
552 @retval LOCK_REC	Record lock
553 @retval 0		Format error
554 */
scan_lock_id(const char * lock_id,uint64_t * trx_immutable_id,uint64_t * lock_immutable_id,ulint * heap_id)555 int scan_lock_id(const char *lock_id, uint64_t *trx_immutable_id,
556                  uint64_t *lock_immutable_id, ulint *heap_id) {
557   i_s_locks_row_t row;
558   int lock_type = trx_i_s_parse_lock_id(lock_id, &row);
559   if (!lock_type) {
560     return 0;
561   }
562   *trx_immutable_id = row.lock_trx_immutable_id;
563   *lock_immutable_id = row.lock_immutable_id;
564   if (lock_type == LOCK_REC) {
565     *heap_id = row.lock_rec;
566   }
567   return lock_type;
568 }
569 
Innodb_data_lock_iterator()570 Innodb_data_lock_iterator::Innodb_data_lock_iterator() {}
571 
~Innodb_data_lock_iterator()572 Innodb_data_lock_iterator::~Innodb_data_lock_iterator() {}
573 
scan(PSI_server_data_lock_container * container,bool with_lock_data)574 bool Innodb_data_lock_iterator::scan(PSI_server_data_lock_container *container,
575                                      bool with_lock_data) {
576   if (m_scan_state.get_pass() == INIT_SCANNING) {
577     if (!container->accept_engine(g_engine, g_engine_length)) {
578       return true;
579     }
580 
581     m_scan_state.prepare_next_scan();
582   }
583 
584   if (m_scan_state.get_pass() == DONE_SCANNING) {
585     return true;
586   }
587 
588   /* We want locks reported in a single scan to be a consistent snapshot. */
589   locksys::Global_exclusive_latch_guard guard{};
590 
591   trx_sys_mutex_enter();
592 
593   size_t found = 0;
594 
595   while ((m_scan_state.get_pass() == SCANNING_RW_TRX_LIST) && (found == 0)) {
596     found =
597         scan_trx_list(container, with_lock_data, true, &trx_sys->rw_trx_list);
598     m_scan_state.prepare_next_scan();
599   }
600 
601   while ((m_scan_state.get_pass() == SCANNING_MYSQL_TRX_LIST) && (found == 0)) {
602     found = scan_trx_list(container, with_lock_data, false,
603                           &trx_sys->mysql_trx_list);
604     m_scan_state.prepare_next_scan();
605   }
606 
607   trx_sys_mutex_exit();
608 
609   return false;
610 }
611 
fetch(PSI_server_data_lock_container * container,const char * engine_lock_id,size_t engine_lock_id_length,bool with_lock_data)612 bool Innodb_data_lock_iterator::fetch(PSI_server_data_lock_container *container,
613                                       const char *engine_lock_id,
614                                       size_t engine_lock_id_length,
615                                       bool with_lock_data) {
616   int record_type;
617   uint64_t trx_immutable_id;
618   ulint heap_id;
619   uint64_t lock_immutable_id;
620   const trx_t *trx;
621 
622   if (!container->accept_engine(g_engine, g_engine_length)) {
623     return true;
624   }
625 
626   record_type = scan_lock_id(engine_lock_id, &trx_immutable_id,
627                              &lock_immutable_id, &heap_id);
628 
629   if (record_type == 0) {
630     return true;
631   }
632 
633   /* scan_trx() requires exclusive global latch to iterate over locks of trx */
634   locksys::Global_exclusive_latch_guard guard{};
635 
636   trx_sys_mutex_enter();
637 
638   trx = fetch_trx_in_trx_list(trx_immutable_id, true, &trx_sys->rw_trx_list);
639 
640   if (trx == nullptr) {
641     trx = fetch_trx_in_trx_list(trx_immutable_id, false,
642                                 &trx_sys->mysql_trx_list);
643   }
644 
645   if (trx != nullptr) {
646     scan_trx(container, with_lock_data, trx, true, lock_immutable_id, heap_id);
647   }
648 
649   trx_sys_mutex_exit();
650 
651   return true;
652 }
653 
654 /** Scan a trx list.
655 @param[in] container		The container to fill
656 @param[in] with_lock_data	True if column LOCK_DATA
657 needs to be populated.
658 @param[in] read_write		True if the trx list is the RW list
659 @param[in] trx_list		The trx list to scan
660 @returns The number of records found
661 */
scan_trx_list(PSI_server_data_lock_container * container,bool with_lock_data,bool read_write,trx_ut_list_t * trx_list)662 size_t Innodb_data_lock_iterator::scan_trx_list(
663     PSI_server_data_lock_container *container, bool with_lock_data,
664     bool read_write, trx_ut_list_t *trx_list) {
665   const trx_t *trx;
666   trx_id_t trx_id;
667   size_t found = 0;
668 
669   /* We are about to scan over various locks of multiple transactions not
670   limited to any particular shard thus we need an exclusive latch on lock_sys */
671   ut_ad(locksys::owns_exclusive_global_latch());
672   ut_ad(trx_sys_mutex_own());
673 
674   for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
675        trx = get_next_trx(trx, read_write)) {
676     if (discard_trx(trx, read_write)) {
677       continue;
678     }
679 
680     trx_id = trx_get_id_for_print(trx);
681 
682     if (!m_scan_state.trx_id_in_range(trx_id)) {
683       continue;
684     }
685 
686     found += scan_trx(container, with_lock_data, trx, false);
687   }
688 
689   return found;
690 }
691 
692 /** Scan a given trx.
693 Either scan all the locks for a transaction,
694 or scan only records matching a given lock.
695 @param[in] container		      The container to fill
696 @param[in] with_lock_data	      True if column LOCK_DATA
697 needs to be populated.
698 @param[in] trx			      The trx to scan
699 @param[in] with_filter		      True if looking for a specific
700 record only.
701 @param[in] filter_lock_immutable_id   Immutable id of lock_t we are looking for
702 @param[in] filter_heap_id	      Heap id to look for, when filtering
703 @returns The number of records found
704 */
scan_trx(PSI_server_data_lock_container * container,bool with_lock_data,const trx_t * trx,bool with_filter,uint64_t filter_lock_immutable_id,ulint filter_heap_id)705 size_t Innodb_data_lock_iterator::scan_trx(
706     PSI_server_data_lock_container *container, bool with_lock_data,
707     const trx_t *trx, bool with_filter, uint64_t filter_lock_immutable_id,
708     ulint filter_heap_id) {
709   assert_trx_nonlocking_or_in_list(trx);
710 
711   size_t found = 0;
712   const lock_t *lock;
713   ulonglong trx_id;
714   ulonglong thread_id;
715   ulonglong event_id;
716   const char *table_path;
717   const char *table_schema;
718   size_t table_schema_length;
719   const char *table_name;
720   size_t table_name_length;
721   const char *partition_name;
722   size_t partition_name_length;
723   const char *subpartition_name;
724   size_t subpartition_name_length;
725   const char *index_name;
726   size_t index_name_length;
727   const void *identity;
728   const char *lock_mode_str;
729   const char *lock_type_str;
730   const char *lock_status_str = "GRANTED";
731   const char *lock_data_str;
732   char engine_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
733   size_t engine_lock_id_length;
734   ulint heap_no;
735   int record_type;
736   lock_t *wait_lock;
737   ut_ad(locksys::owns_exclusive_global_latch());
738   wait_lock = trx->lock.wait_lock;
739 
740   trx_id = trx_get_id_for_print(trx);
741 
742   if (!container->accept_transaction_id(trx_id)) {
743     return 0;
744   }
745 
746   for (lock = lock_get_first_trx_locks(&trx->lock); lock != nullptr;
747        lock = lock_get_next_trx_locks(lock)) {
748     record_type = lock_get_type(lock);
749 
750     if (with_filter &&
751         filter_lock_immutable_id != lock_get_immutable_id(lock)) {
752       continue;
753     }
754 
755     lock_get_psi_event(lock, &thread_id, &event_id);
756 
757     if (!container->accept_thread_id_event_id(thread_id, event_id)) {
758       continue;
759     }
760 
761     table_path = lock_get_table_name(lock).m_name;
762     parse_table_path(container, table_path, strlen(table_path), &table_schema,
763                      &table_schema_length, &table_name, &table_name_length,
764                      &partition_name, &partition_name_length,
765                      &subpartition_name, &subpartition_name_length);
766 
767     if (!container->accept_object(table_schema, table_schema_length, table_name,
768                                   table_name_length, partition_name,
769                                   partition_name_length, subpartition_name,
770                                   subpartition_name_length)) {
771       continue;
772     }
773 
774     identity = lock;
775     lock_mode_str = lock_get_mode_str(lock);
776     lock_type_str = lock_get_type_str(lock);
777 
778     if (lock == wait_lock) {
779       lock_status_str = "WAITING";
780     }
781 
782     switch (record_type) {
783       case LOCK_TABLE:
784         print_table_lock_id(lock, engine_lock_id, sizeof(engine_lock_id));
785         engine_lock_id_length = strlen(engine_lock_id);
786 
787         if (container->accept_lock_id(engine_lock_id, engine_lock_id_length)) {
788           container->add_lock_row(
789               g_engine, g_engine_length, engine_lock_id, engine_lock_id_length,
790               trx_id, thread_id, event_id, table_schema, table_schema_length,
791               table_name, table_name_length, partition_name,
792               partition_name_length, subpartition_name,
793               subpartition_name_length, nullptr, 0, identity, lock_mode_str,
794               lock_type_str, lock_status_str, nullptr);
795           found++;
796         }
797         break;
798       case LOCK_REC:
799         index_name = lock_rec_get_index_name(lock);
800         index_name_length = strlen(index_name);
801 
802         heap_no = lock_rec_find_set_bit(lock);
803 
804         while (heap_no != ULINT_UNDEFINED) {
805           if (!with_filter || (heap_no == filter_heap_id)) {
806             print_record_lock_id(lock, heap_no, engine_lock_id,
807                                  sizeof(engine_lock_id));
808             engine_lock_id_length = strlen(engine_lock_id);
809 
810             if (container->accept_lock_id(engine_lock_id,
811                                           engine_lock_id_length)) {
812               if (with_lock_data) {
813                 p_s_fill_lock_data(&lock_data_str, lock, heap_no, container);
814               } else {
815                 lock_data_str = nullptr;
816               }
817 
818               container->add_lock_row(
819                   g_engine, g_engine_length, engine_lock_id,
820                   engine_lock_id_length, trx_id, thread_id, event_id,
821                   table_schema, table_schema_length, table_name,
822                   table_name_length, partition_name, partition_name_length,
823                   subpartition_name, subpartition_name_length, index_name,
824                   index_name_length, identity, lock_mode_str, lock_type_str,
825                   lock_status_str, lock_data_str);
826               found++;
827             }
828           }
829 
830           heap_no = lock_rec_find_next_set_bit(lock, heap_no);
831         }
832         break;
833       default:
834         ut_error;
835     }
836   }
837 
838   return found;
839 }
840 
Innodb_data_lock_wait_iterator()841 Innodb_data_lock_wait_iterator::Innodb_data_lock_wait_iterator()
842 
843 {}
844 
~Innodb_data_lock_wait_iterator()845 Innodb_data_lock_wait_iterator::~Innodb_data_lock_wait_iterator() {}
846 
scan(PSI_server_data_lock_wait_container * container)847 bool Innodb_data_lock_wait_iterator::scan(
848     PSI_server_data_lock_wait_container *container) {
849   if (m_scan_state.get_pass() == INIT_SCANNING) {
850     if (!container->accept_engine(g_engine, g_engine_length)) {
851       return true;
852     }
853 
854     m_scan_state.prepare_next_scan();
855   }
856 
857   if (m_scan_state.get_pass() == DONE_SCANNING) {
858     return true;
859   }
860 
861   /* We want locks reported in a single scan to be a consistent snapshot. */
862   locksys::Global_exclusive_latch_guard guard{};
863 
864   trx_sys_mutex_enter();
865 
866   size_t found = 0;
867 
868   while ((m_scan_state.get_pass() == SCANNING_RW_TRX_LIST) && (found == 0)) {
869     found = scan_trx_list(container, true, &trx_sys->rw_trx_list);
870     m_scan_state.prepare_next_scan();
871   }
872 
873   while ((m_scan_state.get_pass() == SCANNING_MYSQL_TRX_LIST) && (found == 0)) {
874     found = scan_trx_list(container, false, &trx_sys->mysql_trx_list);
875     m_scan_state.prepare_next_scan();
876   }
877 
878   trx_sys_mutex_exit();
879 
880   return false;
881 }
882 
fetch(PSI_server_data_lock_wait_container * container,const char * requesting_engine_lock_id,size_t requesting_engine_lock_id_length,const char * blocking_engine_lock_id,size_t blocking_engine_lock_id_length)883 bool Innodb_data_lock_wait_iterator::fetch(
884     PSI_server_data_lock_wait_container *container,
885     const char *requesting_engine_lock_id,
886     size_t requesting_engine_lock_id_length,
887     const char *blocking_engine_lock_id,
888     size_t blocking_engine_lock_id_length) {
889   int requesting_record_type;
890   uint64_t requesting_trx_immutable_id;
891   ulint requesting_heap_id;
892   uint64_t requesting_lock_immutable_id;
893   int blocking_record_type;
894   uint64_t blocking_trx_immutable_id;
895   ulint blocking_heap_id;
896   uint64_t blocking_lock_immutable_id;
897   const trx_t *trx;
898 
899   if (!container->accept_engine(g_engine, g_engine_length)) {
900     return true;
901   }
902 
903   requesting_record_type =
904       scan_lock_id(requesting_engine_lock_id, &requesting_trx_immutable_id,
905                    &requesting_lock_immutable_id, &requesting_heap_id);
906 
907   if (requesting_record_type == 0) {
908     return true;
909   }
910 
911   blocking_record_type =
912       scan_lock_id(blocking_engine_lock_id, &blocking_trx_immutable_id,
913                    &blocking_lock_immutable_id, &blocking_heap_id);
914 
915   if (blocking_record_type == 0) {
916     return true;
917   }
918 
919   /* scan_trx() requires exclusive global latch to iterate over locks of trx */
920   locksys::Global_exclusive_latch_guard guard{};
921 
922   trx_sys_mutex_enter();
923 
924   trx = fetch_trx_in_trx_list(requesting_trx_immutable_id, true,
925                               &trx_sys->rw_trx_list);
926 
927   if (trx == nullptr) {
928     trx = fetch_trx_in_trx_list(requesting_trx_immutable_id, false,
929                                 &trx_sys->mysql_trx_list);
930   }
931 
932   if (trx != nullptr) {
933     scan_trx(container, trx, true, requesting_lock_immutable_id,
934              blocking_lock_immutable_id);
935   }
936 
937   trx_sys_mutex_exit();
938 
939   return true;
940 }
941 
942 /** Scan a given transaction list.
943 @param[in] container		The container to fill
944 @param[in] read_write		True if the transaction list is the RW list.
945 @param[in] trx_list		The trx list to scan
946 @returns the number of records found.
947 */
scan_trx_list(PSI_server_data_lock_wait_container * container,bool read_write,trx_ut_list_t * trx_list)948 size_t Innodb_data_lock_wait_iterator::scan_trx_list(
949     PSI_server_data_lock_wait_container *container, bool read_write,
950     trx_ut_list_t *trx_list) {
951   const trx_t *trx;
952   trx_id_t trx_id;
953   size_t found = 0;
954 
955   /* We are about to scan over various locks of multiple transactions not
956   limited to any particular shard thus we need an exclusive latch on lock_sys */
957   ut_ad(locksys::owns_exclusive_global_latch());
958   ut_ad(trx_sys_mutex_own());
959 
960   for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
961        trx = get_next_trx(trx, read_write)) {
962     if (discard_trx(trx, read_write)) {
963       continue;
964     }
965 
966     trx_id = trx_get_id_for_print(trx);
967 
968     if (!m_scan_state.trx_id_in_range(trx_id)) {
969       continue;
970     }
971 
972     found += scan_trx(container, trx, false);
973   }
974 
975   return found;
976 }
977 
978 /** Scan a given transaction.
979 Either scan all the waits for a transaction,
980 or scan only records matching a given wait.
981 @param[in] container		The container to fill
982 @param[in] trx			The trx to scan
983 @param[in] with_filter		True if looking for a given wait only.
984 @param[in] filter_requesting_lock_immutable_id		Immutable id of
985 lock_t for the requesting lock, when filtering
986 @param[in] filter_blocking_lock_immutable_id		Immutable idof
987 lock_t for the blocking lock, when filtering
988 @returns the number of records found.
989 */
scan_trx(PSI_server_data_lock_wait_container * container,const trx_t * trx,bool with_filter,uint64_t filter_requesting_lock_immutable_id,uint64_t filter_blocking_lock_immutable_id)990 size_t Innodb_data_lock_wait_iterator::scan_trx(
991     PSI_server_data_lock_wait_container *container, const trx_t *trx,
992     bool with_filter, uint64_t filter_requesting_lock_immutable_id,
993     uint64_t filter_blocking_lock_immutable_id) {
994   assert_trx_nonlocking_or_in_list(trx);
995 
996   if (trx->lock.que_state != TRX_QUE_LOCK_WAIT) {
997     return 0;
998   }
999 
1000   ulonglong requesting_trx_id;
1001   ulonglong requesting_thread_id;
1002   ulonglong requesting_event_id;
1003   const void *requesting_identity;
1004   char requesting_engine_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
1005   size_t requesting_engine_lock_id_length;
1006   ulonglong blocking_trx_id;
1007   ulonglong blocking_thread_id;
1008   ulonglong blocking_event_id;
1009   const void *blocking_identity;
1010   char blocking_engine_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
1011   size_t blocking_engine_lock_id_length;
1012   ut_ad(locksys::owns_exclusive_global_latch());
1013   lock_t *wait_lock = trx->lock.wait_lock;
1014   const lock_t *curr_lock;
1015   int requesting_record_type;
1016   size_t found = 0;
1017   lock_queue_iterator_t iter;
1018 
1019   ut_a(wait_lock != nullptr);
1020 
1021   requesting_record_type = lock_get_type(wait_lock);
1022 
1023   if (with_filter &&
1024       lock_get_immutable_id(wait_lock) != filter_requesting_lock_immutable_id) {
1025     return 0;
1026   }
1027 
1028   requesting_trx_id = trx_get_id_for_print(trx);
1029   if (!container->accept_requesting_transaction_id(requesting_trx_id)) {
1030     return 0;
1031   }
1032 
1033   lock_get_psi_event(wait_lock, &requesting_thread_id, &requesting_event_id);
1034   if (!container->accept_requesting_thread_id_event_id(requesting_thread_id,
1035                                                        requesting_event_id)) {
1036     return 0;
1037   }
1038 
1039   ulint heap_no = 0;
1040   if (requesting_record_type == LOCK_REC) {
1041     heap_no = lock_rec_find_set_bit(wait_lock);
1042   }
1043 
1044   print_lock_id(wait_lock, heap_no, requesting_engine_lock_id,
1045                 sizeof(requesting_engine_lock_id));
1046   requesting_engine_lock_id_length = strlen(requesting_engine_lock_id);
1047   if (!container->accept_requesting_lock_id(requesting_engine_lock_id,
1048                                             requesting_engine_lock_id_length)) {
1049     return 0;
1050   }
1051 
1052   requesting_identity = wait_lock;
1053   lock_queue_iterator_reset(&iter, wait_lock, ULINT_UNDEFINED);
1054 
1055   for (curr_lock = lock_queue_iterator_get_prev(&iter); curr_lock != nullptr;
1056        curr_lock = lock_queue_iterator_get_prev(&iter)) {
1057     if (with_filter &&
1058         lock_get_immutable_id(curr_lock) != filter_blocking_lock_immutable_id) {
1059       continue;
1060     }
1061 
1062     if (lock_has_to_wait(wait_lock, curr_lock)) {
1063       blocking_trx_id = lock_get_trx_id(curr_lock);
1064       if (!container->accept_blocking_transaction_id(blocking_trx_id)) {
1065         continue;
1066       }
1067 
1068       lock_get_psi_event(curr_lock, &blocking_thread_id, &blocking_event_id);
1069       if (!container->accept_blocking_thread_id_event_id(blocking_thread_id,
1070                                                          blocking_event_id)) {
1071         continue;
1072       }
1073 
1074       blocking_identity = curr_lock;
1075       print_lock_id(curr_lock, heap_no, blocking_engine_lock_id,
1076                     sizeof(blocking_engine_lock_id));
1077       blocking_engine_lock_id_length = strlen(blocking_engine_lock_id);
1078       if (!container->accept_blocking_lock_id(blocking_engine_lock_id,
1079                                               blocking_engine_lock_id_length)) {
1080         continue;
1081       }
1082 
1083       container->add_lock_wait_row(
1084           g_engine, g_engine_length, requesting_engine_lock_id,
1085           requesting_engine_lock_id_length, requesting_trx_id,
1086           requesting_thread_id, requesting_event_id, requesting_identity,
1087           blocking_engine_lock_id, blocking_engine_lock_id_length,
1088           blocking_trx_id, blocking_thread_id, blocking_event_id,
1089           blocking_identity);
1090       found++;
1091     }
1092   }
1093 
1094   return found;
1095 }
1096