1 /* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License, version 2.0, for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software Foundation,
21   51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
22 
23 /**
24   @file storage/perfschema/pfs.cc
25   The performance schema implementation of all instruments.
26 */
27 #include "my_global.h"
28 #include "thr_lock.h"
29 #include "mysql/psi/psi.h"
30 #include "mysql/psi/mysql_thread.h"
31 #include "my_pthread.h"
32 #include "sql_const.h"
33 #include "pfs.h"
34 #include "pfs_instr_class.h"
35 #include "pfs_instr.h"
36 #include "pfs_host.h"
37 #include "pfs_user.h"
38 #include "pfs_account.h"
39 #include "pfs_global.h"
40 #include "pfs_column_values.h"
41 #include "pfs_timer.h"
42 #include "pfs_events_waits.h"
43 #include "pfs_events_stages.h"
44 #include "pfs_events_statements.h"
45 #include "pfs_setup_actor.h"
46 #include "pfs_setup_object.h"
47 #include "sql_error.h"
48 #include "sp_head.h"
49 #include "pfs_digest.h"
50 
51 /**
52   @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
53   MySQL PERFORMANCE_SCHEMA implementation.
54 
55   @section INTRO Introduction
56   The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
57   the server at runtime.
58   The performance schema focuses primarily on performance data,
59   as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
60 
61   From a user point of view, the performance schema consists of:
62   - a dedicated database schema, named PERFORMANCE_SCHEMA,
63   - SQL tables, used to query the server internal state or change
64   configuration settings.
65 
66   From an implementation point of view, the performance schema is a dedicated
67   Storage Engine which exposes data collected by 'Instrumentation Points'
68   placed in the server code.
69 
70   @section INTERFACES Multiple interfaces
71 
72   The performance schema exposes many different interfaces,
73   for different components, and for different purposes.
74 
75   @subsection INT_INSTRUMENTING Instrumenting interface
76 
77   All the data representing the server internal state exposed
78   in the performance schema must be first collected:
79   this is the role of the instrumenting interface.
80   The instrumenting interface is a coding interface provided
81   by implementors (of the performance schema) to implementors
82   (of the server or server components).
83 
84   This interface is available to:
85   - C implementations
86   - C++ implementations
87   - the core SQL layer (/sql)
88   - the mysys library (/mysys)
89   - MySQL plugins, including storage engines,
90   - third party plugins, including third party storage engines.
91 
92   For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
93   "instrumentation interface page".
94 
95   @subsection INT_COMPILING Compiling interface
96 
97   The implementation of the performance schema can be enabled or disabled at
98   build time, when building MySQL from the source code.
99 
100   When building with the performance schema code, some compilation flags
101   are available to change the default values used in the code, if required.
102 
103   For more details, see:
104   @verbatim ./configure --help @endverbatim
105 
106   To compile with the performance schema:
107   @verbatim ./configure --with-perfschema @endverbatim
108 
109   The implementation of all the compiling options is located in
110   @verbatim ./storage/perfschema/plug.in @endverbatim
111 
112   @subsection INT_STARTUP Server startup interface
113 
114   The server startup interface consists of the "./mysqld ..."
115   command line used to start the server.
116   When the performance schema is compiled in the server binary,
117   extra command line options are available.
118 
119   These extra start options allow the DBA to:
120   - enable or disable the performance schema
121   - specify some sizing parameters.
122 
123   To see help for the performance schema startup options, see:
124   @verbatim ./sql/mysqld --verbose --help  @endverbatim
125 
126   The implementation of all the startup options is located in
127   @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
128 
129   @subsection INT_BOOTSTRAP Server bootstrap interface
130 
131   The bootstrap interface is a private interface exposed by
132   the performance schema, and used by the SQL layer.
133   Its role is to advertise all the SQL tables natively
134   supported by the performance schema to the SQL server.
135   The code consists of creating MySQL tables for the
136   performance schema itself, and is used in './mysql --bootstrap'
137   mode when a server is installed.
138 
139   The implementation of the database creation script is located in
140   @verbatim ./scripts/mysql_performance_tables.sql @endverbatim
141 
142   @subsection INT_CONFIG Runtime configuration interface
143 
144   When the performance schema is used at runtime, various configuration
145   parameters can be used to specify what kind of data is collected,
146   what kind of aggregations are computed, what kind of timers are used,
147   what events are timed, etc.
148 
149   For all these capabilities, not a single statement or special syntax
150   was introduced in the parser.
151   Instead of new SQL statements, the interface consists of DML
152   (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
153 
154   For example:
155   @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
156     set ENABLED='YES', TIMED='YES';
157   Query OK, 234 rows affected (0.00 sec)
158   Rows matched: 234  Changed: 234  Warnings: 0 @endverbatim
159 
160   @subsection INT_STATUS Internal audit interface
161 
162   The internal audit interface is provided to the DBA to inspect if the
163   performance schema code itself is functioning properly.
164   This interface is necessary because a failure caused while
165   instrumenting code in the server should not cause failures in the
166   MySQL server itself, so that the performance schema implementation
167   never raises errors during runtime execution.
168 
169   This auditing interface consists of:
170   @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
171   It displays data related to the memory usage of the performance schema,
172   as well as statistics about lost events, if any.
173 
174   The SHOW STATUS command is implemented in
175   @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
176 
177   @subsection INT_QUERY Query interface
178 
179   The query interface is used to query the internal state of a running server.
180   It is provided as SQL tables.
181 
182   For example:
183   @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
184   @endverbatim
185 
186   @section DESIGN_PRINCIPLES Design principles
187 
188   @subsection PRINCIPLE_BEHAVIOR No behavior changes
189 
190   The primary goal of the performance schema is to measure (instrument) the
191   execution of the server. A good measure should not cause any change
192   in behavior.
193 
194   To achieve this, the overall design of the performance schema complies
195   with the following very severe design constraints:
196 
197   The parser is unchanged. There are no new keywords, no new statements.
198   This guarantees that existing applications will run the same way with or
199   without the performance schema.
200 
201   All the instrumentation points return "void", there are no error codes.
202   Even if the performance schema internally fails, execution of the server
203   code will proceed.
204 
205   None of the instrumentation points allocate memory.
206   All the memory used by the performance schema is pre-allocated at startup,
207   and is considered "static" during the server life time.
208 
209   None of the instrumentation points use any pthread_mutex, pthread_rwlock,
210   or pthread_cond (or platform equivalents).
211   Executing the instrumentation point should not cause thread scheduling to
212   change in the server.
213 
214   In other words, the implementation of the instrumentation points,
215   including all the code called by the instrumentation points, is:
216   - malloc free
217   - mutex free
218   - rwlock free
219 
220   TODO: All the code located in storage/perfschema is malloc free,
221   but unfortunately the usage of LF_HASH introduces some memory allocation.
222   This should be revised if possible, to use a lock-free,
223   malloc-free hash code table.
224 
225   @subsection PRINCIPLE_PERFORMANCE No performance hit
226 
227   The instrumentation of the server should be as fast as possible.
228   In cases when there are choices between:
229   - doing some processing when recording the performance data
230   in the instrumentation,
231   - doing some processing when retrieving the performance data,
232 
233   priority is given in the design to make the instrumentation faster,
234   pushing some complexity to data retrieval.
235 
236   As a result, some parts of the design, related to:
237   - the setup code path,
238   - the query code path,
239 
240   might appear to be sub-optimal.
241 
242   The criterion used here is to optimize primarily the critical path (data
243   collection), possibly at the expense of non-critical code paths.
244 
245   @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
246 
247   For the performance schema in general to be successful, the barrier
248   of entry for a developer should be low, so it's easy to instrument code.
249 
250   In particular, the instrumentation interface:
251   - is available for C and C++ code (so it's a C interface),
252   - does not require parameters that the calling code can't easily provide,
253   - supports partial instrumentation (for example, instrumenting mutexes does
254   not require that every mutex is instrumented)
255 
256   @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
257 
258   As the content of the performance schema improves,
259   with more tables exposed and more data collected,
260   the instrumentation interface will also be augmented
261   to support instrumenting new concepts.
262   Existing instrumentations should not be affected when additional
263   instrumentation is made available, and making a new instrumentation
264   available should not require existing instrumented code to support it.
265 
266   @subsection PRINCIPLE_VERSIONED Versioned instrumentation
267 
268   Given that the instrumentation offered by the performance schema will
269   be augmented with time, when more features are implemented,
270   the interface itself should be versioned, to keep compatibility
271   with previous instrumented code.
272 
273   For example, after both plugin-A and plugin-B have been instrumented for
274   mutexes, read write locks and conditions, using the instrumentation
275   interface, we can anticipate that the instrumentation interface
276   is expanded to support file based operations.
277 
278   Plugin-A, a file based storage engine, will most likely use the expanded
279   interface and instrument its file usage, using the version 2
280   interface, while Plugin-B, a network based storage engine, will not change
281   its code and not release a new binary.
282 
283   When later the instrumentation interface is expanded to support network
284   based operations (which will define interface version 3), the Plugin-B code
285   can then be changed to make use of it.
286 
287   Note, this is just an example to illustrate the design concept here.
288   Both mutexes and file instrumentation are already available
289   since version 1 of the instrumentation interface.
290 
291   @subsection PRINCIPLE_DEPLOYMENT Easy deployment
292 
293   Internally, we might want every plugin implementation to upgrade the
294   instrumented code to the latest available, but this will cause additional
295   work and this is not practical if the code change is monolithic.
296 
297   Externally, for third party plugin implementors, asking implementors to
298   always stay aligned to the latest instrumentation and make new releases,
299   even when the change does not provide new functionality for them,
300   is a bad idea.
301 
302   For example, requiring a network based engine to re-release because the
303   instrumentation interface changed for file based operations, will create
304   too many deployment issues.
305 
306   So, the performance schema implementation must support concurrently,
307   in the same deployment, multiple versions of the instrumentation
308   interface, and ensure binary compatibility with each version.
309 
310   In addition to this, the performance schema can be included or excluded
311   from the server binary, using build time configuration options.
312 
313   Regardless, the following types of deployment are valid:
314   - a server supporting the performance schema + a storage engine
315   that is not instrumented
316   - a server not supporting the performance schema + a storage engine
317   that is instrumented
318 */
319 
320 /**
321   @page PAGE_INSTRUMENTATION_INTERFACE Performance schema: instrumentation interface page.
322   MySQL performance schema instrumentation interface.
323 
324   @section INTRO Introduction
325 
326   The instrumentation interface consist of two layers:
327   - a raw ABI (Application Binary Interface) layer, that exposes the primitive
328   instrumentation functions exported by the performance schema instrumentation
329   - an API (Application Programing Interface) layer,
330   that provides many helpers for a developer instrumenting some code,
331   to make the instrumentation as easy as possible.
332 
333   The ABI layer consists of:
334 @code
335 #include "mysql/psi/psi.h"
336 @endcode
337 
338   The API layer consists of:
339 @code
340 #include "mysql/psi/mutex_mutex.h"
341 #include "mysql/psi/mutex_file.h"
342 @endcode
343 
344   The first helper is for mutexes, rwlocks and conditions,
345   the second for file io.
346 
347   The API layer exposes C macros and typedefs which will expand:
348   - either to non-instrumented code, when compiled without the performance
349   schema instrumentation
350   - or to instrumented code, that will issue the raw calls to the ABI layer
351   so that the implementation can collect data.
352 
353   Note that all the names introduced (for example, @c mysql_mutex_lock) do not
354   collide with any other namespace.
355   In particular, the macro @c mysql_mutex_lock is on purpose not named
356   @c pthread_mutex_lock.
357   This is to:
358   - avoid overloading @c pthread_mutex_lock with yet another macro,
359   which is dangerous as it can affect user code and pollute
360   the end-user namespace.
361   - allow the developer instrumenting code to selectively instrument
362   some code but not all.
363 
364   @section PRINCIPLES Design principles
365 
366   The ABI part is designed as a facade, that exposes basic primitives.
367   The expectation is that each primitive will be very stable over time,
368   but the list will constantly grow when more instruments are supported.
369   To support binary compatibility with plugins compiled with a different
370   version of the instrumentation, the ABI itself is versioned
371   (see @c PSI_v1, @c PSI_v2).
372 
373   For a given instrumentation point in the API, the basic coding pattern
374   used is:
375   - (a) notify the performance schema of the operation
376   about to be performed.
377   - (b) execute the instrumented code.
378   - (c) notify the performance schema that the operation
379   is completed.
380 
381   An opaque "locker" pointer is returned by (a), that is given to (c).
382   This pointer helps the implementation to keep context, for performances.
383 
384   The following code fragment is annotated to show how in detail this pattern
385   in implemented, when the instrumentation is compiled in:
386 
387 @verbatim
388 static inline int mysql_mutex_lock(
389   mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
390 {
391   int result;
392   struct PSI_mutex_locker_state state;
393   struct PSI_mutex_locker *locker= NULL;
394 
395   ............... (a)
396   locker= PSI_server->start_mutex_wait(&state, that->p_psi,
397                                        PSI_MUTEX_LOCK, locker, src_file, src_line);
398 
399   ............... (b)
400   result= pthread_mutex_lock(&that->m_mutex);
401 
402   ............... (c)
403   PSI_server->end_mutex_wait(locker, result);
404 
405   return result;
406 }
407 @endverbatim
408 
409   When the performance schema instrumentation is not compiled in,
410   the code becomes simply a wrapper, expanded in line by the compiler:
411 
412 @verbatim
413 static inline int mysql_mutex_lock(...)
414 {
415   int result;
416 
417   ............... (b)
418   result= pthread_mutex_lock(&that->m_mutex);
419 
420   return result;
421 }
422 @endverbatim
423 */
424 
425 /**
426   @page PAGE_AGGREGATES Performance schema: the aggregates page.
427   Performance schema aggregates.
428 
429   @section INTRO Introduction
430 
431   Aggregates tables are tables that can be formally defined as
432   SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
433 
434   Each group clause defines a different kind of aggregate, and corresponds to
435   a different table exposed by the performance schema.
436 
437   Aggregates can be either:
438   - computed on the fly,
439   - computed on demand, based on other available data.
440 
441   'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
442   the best approximation is EVENTS_WAITS_HISTORY_LONG.
443   Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
444   while aggregates computed on demand are based on other
445   EVENTS_WAITS_SUMMARY_BY_xxx tables.
446 
447   To better understand the implementation itself, a bit of math is
448   required first, to understand the model behind the code:
449   the code is deceptively simple, the real complexity resides
450   in the flyweight of pointers between various performance schema buffers.
451 
452   @section DIMENSION Concept of dimension
453 
454   An event measured by the instrumentation has many attributes.
455   An event is represented as a data point P(x1, x2, ..., xN),
456   where each x_i coordinate represents a given attribute value.
457 
458   Examples of attributes are:
459   - the time waited
460   - the object waited on
461   - the instrument waited on
462   - the thread that waited
463   - the operation performed
464   - per object or per operation additional attributes, such as spins,
465   number of bytes, etc.
466 
467   Computing an aggregate per thread is fundamentally different from
468   computing an aggregate by instrument, so the "_BY_THREAD" and
469   "_BY_EVENT_NAME" aggregates are different dimensions,
470   operating on different x_i and x_j coordinates.
471   These aggregates are "orthogonal".
472 
473   @section PROJECTION Concept of projection
474 
475   A given x_i attribute value can convey either just one basic information,
476   such as a number of bytes, or can convey implied information,
477   such as an object fully qualified name.
478 
479   For example, from the value "test.t1", the name of the object schema
480   "test" can be separated from the object name "t1", so that now aggregates
481   by object schema can be implemented.
482 
483   In math terms, that corresponds to defining a function:
484   F_i (x): x --> y
485   Applying this function to our point P gives another point P':
486 
487   F_i (P):
488   P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N)
489   --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
490 
491   That function defines in fact an aggregate !
492   In SQL terms, this aggregate would look like the following table:
493 
494 @verbatim
495   CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
496   SELECT col_1, col_2, ..., col_{i-1},
497          Func_i(col_i),
498          COUNT(col_i),
499          MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
500          col_{i+1}, ..., col_N
501          FROM EVENTS_WAITS_HISTORY_INFINITE
502          group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
503 @endverbatim
504 
505   Note that not all columns have to be included,
506   in particular some columns that are dependent on the x_i column should
507   be removed, so that in practice, MySQL's aggregation method tends to
508   remove many attributes at each aggregation steps.
509 
510   For example, when aggregating wait events by object instances,
511   - the wait_time and number_of_bytes can be summed,
512   and sum(wait_time) now becomes an object instance attribute.
513   - the source, timer_start, timer_end columns are not in the
514   _BY_INSTANCE table, because these attributes are only
515   meaningful for a wait.
516 
517   @section COMPOSITION Concept of composition
518 
519   Now, the "test.t1" --> "test" example was purely theory,
520   just to explain the concept, and does not lead very far.
521   Let's look at a more interesting example of data that can be derived
522   from the row event.
523 
524   An event creates a transient object, PFS_wait_locker, per operation.
525   This object's life cycle is extremely short: it's created just
526   before the start_wait() instrumentation call, and is destroyed in
527   the end_wait() call.
528 
529   The wait locker itself contains a pointer to the object instance
530   waited on.
531   That allows to implement a wait_locker --> object instance projection,
532   with m_target.
533   The object instance life cycle depends on _init and _destroy calls
534   from the code, such as mysql_mutex_init()
535   and mysql_mutex_destroy() for a mutex.
536 
537   The object instance waited on contains a pointer to the object class,
538   which is represented by the instrument name.
539   That allows to implement an object instance --> object class projection.
540   The object class life cycle is permanent, as instruments are loaded in
541   the server and never removed.
542 
543   The object class is named in such a way
544   (for example, "wait/sync/mutex/sql/LOCK_open",
545   "wait/io/file/maria/data_file) that the component ("sql", "maria")
546   that it belongs to can be inferred.
547   That allows to implement an object class --> server component projection.
548 
549   Back to math again, we have, for example for mutexes:
550 
551   F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
552 
553   F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
554 
555   F2_to_3 (i) : PFS_mutex_class i --> const char *component =
556                                         substring(i->m_name, ...)
557 
558   Per components aggregates are not implemented, this is just an illustration.
559 
560   F1 alone defines this aggregate:
561 
562   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
563   (or MUTEX_INSTANCE)
564 
565   F1_to_2 alone could define this aggregate:
566 
567   EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
568 
569   Alternatively, using function composition, with
570   F2 = F1_to_2 o F1, F2 defines:
571 
572   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
573 
574   Likewise, F_2_to_3 defines:
575 
576   EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
577 
578   and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
579 
580   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
581 
582   What has all this to do with the code ?
583 
584   Functions (or aggregates) such as F_3 are not implemented as is.
585   Instead, they are decomposed into F_2_to_3 o F_1_to_2 o F1,
586   and each intermediate aggregate is stored into an internal buffer.
587   This allows to support every F1, F2, F3 aggregates from shared
588   internal buffers, where computation already performed to compute F2
589   is reused when computing F3.
590 
591   @section OBJECT_GRAPH Object graph
592 
593   In terms of object instances, or records, pointers between
594   different buffers define an object instance graph.
595 
596   For example, assuming the following scenario:
597   - A mutex class "M" is instrumented, the instrument name
598   is "wait/sync/mutex/sql/M"
599   - This mutex instrument has been instantiated twice,
600   mutex instances are noted M-1 and M-2
601   - Threads T-A and T-B are locking mutex instance M-1
602   - Threads T-C and T-D are locking mutex instance M-2
603 
604   The performance schema will record the following data:
605   - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
606   - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
607   - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
608 
609   The graph of structures will look like:
610 
611 @verbatim
612   PFS_wait_locker (T-A, M-1) ----------
613                                       |
614                                       v
615                                  PFS_mutex (M-1)
616                                  - m_wait_stat    ------------
617                                       ^                      |
618                                       |                      |
619   PFS_wait_locker (T-B, M-1) ----------                      |
620                                                              v
621                                                         PFS_mutex_class (M)
622                                                         - m_wait_stat
623   PFS_wait_locker (T-C, M-2) ----------                      ^
624                                       |                      |
625                                       v                      |
626                                  PFS_mutex (M-2)             |
627                                  - m_wait_stat    ------------
628                                       ^
629                                       |
630   PFS_wait_locker (T-D, M-2) ----------
631 
632             ||                        ||                     ||
633             ||                        ||                     ||
634             vv                        vv                     vv
635 
636   EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
637 @endverbatim
638 
639   @section ON_THE_FLY On the fly aggregates
640 
641   'On the fly' aggregates are computed during the code execution.
642   This is necessary because the data the aggregate is based on is volatile,
643   and can not be kept indefinitely.
644 
645   With on the fly aggregates:
646   - the writer thread does all the computation
647   - the reader thread accesses the result directly
648 
649   This model is to be avoided if possible, due to the overhead
650   caused when instrumenting code.
651 
652   @section HIGHER_LEVEL Higher level aggregates
653 
654   'Higher level' aggregates are implemented on demand only.
655   The code executing a SELECT from the aggregate table is
656   collecting data from multiple internal buffers to produce the result.
657 
658   With higher level aggregates:
659   - the reader thread does all the computation
660   - the writer thread has no overhead.
661 
662   @section MIXED Mixed level aggregates
663 
664   The 'Mixed' model is a compromise between 'On the fly' and 'Higher level'
665   aggregates, for internal buffers that are not permanent.
666 
667   While an object is present in a buffer, the higher level model is used.
668   When an object is about to be destroyed, statistics are saved into
669   a 'parent' buffer with a longer life cycle, to follow the on the fly model.
670 
671   With mixed aggregates:
672   - the reader thread does a lot of complex computation,
673   - the writer thread has minimal overhead, on destroy events.
674 
675   @section IMPL_WAIT Implementation for waits aggregates
676 
677   For waits, the tables that contains aggregated wait data are:
678   - EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
679   - EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME
680   - EVENTS_WAITS_SUMMARY_BY_INSTANCE
681   - EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
682   - EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME
683   - EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME
684   - FILE_SUMMARY_BY_EVENT_NAME
685   - FILE_SUMMARY_BY_INSTANCE
686   - SOCKET_SUMMARY_BY_INSTANCE
687   - SOCKET_SUMMARY_BY_EVENT_NAME
688   - OBJECTS_SUMMARY_GLOBAL_BY_TYPE
689 
690   The instrumented code that generates waits events consist of:
691   - mutexes (mysql_mutex_t)
692   - rwlocks (mysql_rwlock_t)
693   - conditions (mysql_cond_t)
694   - file io (MYSQL_FILE)
695   - socket io (MYSQL_SOCKET)
696   - table io
697   - table lock
698   - idle
699 
700   The flow of data between aggregates tables varies for each instrumentation.
701 
702   @subsection IMPL_WAIT_MUTEX Mutex waits
703 
704 @verbatim
705   mutex_locker(T, M)
706    |
707    | [1]
708    |
709    |-> pfs_mutex(M)                           =====>> [B], [C]
710    |    |
711    |    | [2]
712    |    |
713    |    |-> pfs_mutex_class(M.class)          =====>> [C]
714    |
715    |-> pfs_thread(T).event_name(M)            =====>> [A], [D], [E], [F]
716         |
717         | [3]
718         |
719      3a |-> pfs_account(U, H).event_name(M)   =====>> [D], [E], [F]
720         .    |
721         .    | [4-RESET]
722         .    |
723      3b .....+-> pfs_user(U).event_name(M)    =====>> [E]
724         .    |
725      3c .....+-> pfs_host(H).event_name(M)    =====>> [F]
726 @endverbatim
727 
728   How to read this diagram:
729   - events that occur during the instrumented code execution are noted with numbers,
730   as in [1]. Code executed by these events has an impact on overhead.
731   - events that occur during TRUNCATE TABLE operations are noted with numbers,
732   followed by "-RESET", as in [4-RESET].
733   Code executed by these events has no impact on overhead,
734   since they are executed by independent monitoring sessions.
735   - events that occur when a reader extracts data from a performance schema table
736   are noted with letters, as in [A]. The name of the table involved,
737   and the method that builds a row are documented. Code executed by these events
738   has no impact on the instrumentation overhead. Note that the table
739   implementation may pull data from different buffers.
740   - nominal code paths are in plain lines. A "nominal" code path corresponds to
741   cases where the performance schema buffers are sized so that no records are lost.
742   - degenerated code paths are in dotted lines. A "degenerated" code path corresponds
743   to edge cases where parent buffers are full, which forces the code to aggregate to
744   grand parents directly.
745 
746   Implemented as:
747   - [1] @c start_mutex_wait_v1(), @c end_mutex_wait_v1()
748   - [2] @c destroy_mutex_v1()
749   - [3] @c aggregate_thread_waits()
750   - [4] @c PFS_account::aggregate_waits()
751   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
752         @c table_ews_by_thread_by_event_name::make_row()
753   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
754         @c table_events_waits_summary_by_instance::make_mutex_row()
755   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
756         @c table_ews_global_by_event_name::make_mutex_row()
757   - [D] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
758         @c table_ews_by_account_by_event_name::make_row()
759   - [E] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
760         @c table_ews_by_user_by_event_name::make_row()
761   - [F] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
762         @c table_ews_by_host_by_event_name::make_row()
763 
764   Table EVENTS_WAITS_SUMMARY_BY_INSTANCE is a 'on the fly' aggregate,
765   because the data is collected on the fly by (1) and stored into a buffer,
766   pfs_mutex. The table implementation [B] simply reads the results directly
767   from this buffer.
768 
769   Table EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME is a 'mixed' aggregate,
770   because some data is collected on the fly (1),
771   some data is preserved with (2) at a later time in the life cycle,
772   and two different buffers pfs_mutex and pfs_mutex_class are used to store the
773   statistics collected. The table implementation [C] is more complex, since
774   it reads from two buffers pfs_mutex and pfs_mutex_class.
775 
776   @subsection IMPL_WAIT_RWLOCK Rwlock waits
777 
778 @verbatim
779   rwlock_locker(T, R)
780    |
781    | [1]
782    |
783    |-> pfs_rwlock(R)                          =====>> [B], [C]
784    |    |
785    |    | [2]
786    |    |
787    |    |-> pfs_rwlock_class(R.class)         =====>> [C]
788    |
789    |-> pfs_thread(T).event_name(R)            =====>> [A]
790         |
791        ...
792 @endverbatim
793 
794   Implemented as:
795   - [1] @c start_rwlock_rdwait_v1(), @c end_rwlock_rdwait_v1(), ...
796   - [2] @c destroy_rwlock_v1()
797   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
798         @c table_ews_by_thread_by_event_name::make_row()
799   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
800         @c table_events_waits_summary_by_instance::make_rwlock_row()
801   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
802         @c table_ews_global_by_event_name::make_rwlock_row()
803 
804   @subsection IMPL_WAIT_COND Cond waits
805 
806 @verbatim
807   cond_locker(T, C)
808    |
809    | [1]
810    |
811    |-> pfs_cond(C)                            =====>> [B], [C]
812    |    |
813    |    | [2]
814    |    |
815    |    |-> pfs_cond_class(C.class)           =====>> [C]
816    |
817    |-> pfs_thread(T).event_name(C)            =====>> [A]
818         |
819        ...
820 @endverbatim
821 
822   Implemented as:
823   - [1] @c start_cond_wait_v1(), @c end_cond_wait_v1()
824   - [2] @c destroy_cond_v1()
825   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
826         @c table_ews_by_thread_by_event_name::make_row()
827   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
828         @c table_events_waits_summary_by_instance::make_cond_row()
829   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
830         @c table_ews_global_by_event_name::make_cond_row()
831 
832   @subsection IMPL_WAIT_FILE File waits
833 
834 @verbatim
835   file_locker(T, F)
836    |
837    | [1]
838    |
839    |-> pfs_file(F)                            =====>> [B], [C], [D], [E]
840    |    |
841    |    | [2]
842    |    |
843    |    |-> pfs_file_class(F.class)           =====>> [C], [D]
844    |
845    |-> pfs_thread(T).event_name(F)            =====>> [A]
846         |
847        ...
848 @endverbatim
849 
850   Implemented as:
851   - [1] @c get_thread_file_name_locker_v1(), @c start_file_wait_v1(),
852         @c end_file_wait_v1(), ...
853   - [2] @c close_file_v1()
854   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
855         @c table_ews_by_thread_by_event_name::make_row()
856   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
857         @c table_events_waits_summary_by_instance::make_file_row()
858   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
859         @c table_ews_global_by_event_name::make_file_row()
860   - [D] FILE_SUMMARY_BY_EVENT_NAME,
861         @c table_file_summary_by_event_name::make_row()
862   - [E] FILE_SUMMARY_BY_INSTANCE,
863         @c table_file_summary_by_instance::make_row()
864 
865   @subsection IMPL_WAIT_SOCKET Socket waits
866 
867 @verbatim
868   socket_locker(T, S)
869    |
870    | [1]
871    |
872    |-> pfs_socket(S)                            =====>> [A], [B], [C], [D], [E]
873         |
874         | [2]
875         |
876         |-> pfs_socket_class(S.class)           =====>> [C], [D]
877         |
878         |-> pfs_thread(T).event_name(S)         =====>> [A]
879         |
880         | [3]
881         |
882      3a |-> pfs_account(U, H).event_name(S)     =====>> [F], [G], [H]
883         .    |
884         .    | [4-RESET]
885         .    |
886      3b .....+-> pfs_user(U).event_name(S)      =====>> [G]
887         .    |
888      3c .....+-> pfs_host(H).event_name(S)      =====>> [H]
889 @endverbatim
890 
891   Implemented as:
892   - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
893   - [2] @c close_socket_v1()
894   - [3] @c aggregate_thread_waits()
895   - [4] @c PFS_account::aggregate_waits()
896   - [5] @c PFS_host::aggregate_waits()
897   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
898         @c table_ews_by_thread_by_event_name::make_row()
899   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
900         @c table_events_waits_summary_by_instance::make_socket_row()
901   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
902         @c table_ews_global_by_event_name::make_socket_row()
903   - [D] SOCKET_SUMMARY_BY_EVENT_NAME,
904         @c table_socket_summary_by_event_name::make_row()
905   - [E] SOCKET_SUMMARY_BY_INSTANCE,
906         @c table_socket_summary_by_instance::make_row()
907   - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
908         @c table_ews_by_account_by_event_name::make_row()
909   - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
910         @c table_ews_by_user_by_event_name::make_row()
911   - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
912         @c table_ews_by_host_by_event_name::make_row()
913 
914   @subsection IMPL_WAIT_TABLE Table waits
915 
916 @verbatim
917   table_locker(Thread Th, Table Tb, Event = io or lock)
918    |
919    | [1]
920    |
921 1a |-> pfs_table(Tb)                          =====>> [A], [B], [C]
922    |    |
923    |    | [2]
924    |    |
925    |    |-> pfs_table_share(Tb.share)         =====>> [B], [C]
926    |         |
927    |         | [3]
928    |         |
929    |         |-> global_table_io_stat         =====>> [C]
930    |         |
931    |         |-> global_table_lock_stat       =====>> [C]
932    |
933 1b |-> pfs_thread(Th).event_name(E)           =====>> [D], [E], [F], [G]
934    |    |
935    |    | [ 4-RESET]
936    |    |
937    |    |-> pfs_account(U, H).event_name(E)   =====>> [E], [F], [G]
938    |    .    |
939    |    .    | [5-RESET]
940    |    .    |
941    |    .....+-> pfs_user(U).event_name(E)    =====>> [F]
942    |    .    |
943    |    .....+-> pfs_host(H).event_name(E)    =====>> [G]
944    |
945 1c |-> pfs_thread(Th).waits_current(W)        =====>> [H]
946    |
947 1d |-> pfs_thread(Th).waits_history(W)        =====>> [I]
948    |
949 1e |-> waits_history_long(W)                  =====>> [J]
950 @endverbatim
951 
952   Implemented as:
953   - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
954   - [2] @c close_table_v1()
955   - [3] @c drop_table_share_v1()
956   - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
957   - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
958   - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
959         @c table_events_waits_summary_by_instance::make_table_row()
960   - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
961         @c table_os_global_by_type::make_row()
962   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
963         @c table_ews_global_by_event_name::make_table_io_row(),
964         @c table_ews_global_by_event_name::make_table_lock_row()
965   - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
966         @c table_ews_by_thread_by_event_name::make_row()
967   - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
968         @c table_ews_by_user_by_account_name::make_row()
969   - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
970         @c table_ews_by_user_by_event_name::make_row()
971   - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
972         @c table_ews_by_host_by_event_name::make_row()
973   - [H] EVENTS_WAITS_CURRENT,
974         @c table_events_waits_common::make_row()
975   - [I] EVENTS_WAITS_HISTORY,
976         @c table_events_waits_common::make_row()
977   - [J] EVENTS_WAITS_HISTORY_LONG,
978         @c table_events_waits_common::make_row()
979 
980   @section IMPL_STAGE Implementation for stages aggregates
981 
982   For stages, the tables that contains aggregated data are:
983   - EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
984   - EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME
985   - EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME
986   - EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME
987   - EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME
988 
989 @verbatim
990   start_stage(T, S)
991    |
992    | [1]
993    |
994 1a |-> pfs_thread(T).event_name(S)            =====>> [A], [B], [C], [D], [E]
995    |    |
996    |    | [2]
997    |    |
998    | 2a |-> pfs_account(U, H).event_name(S)   =====>> [B], [C], [D], [E]
999    |    .    |
1000    |    .    | [3-RESET]
1001    |    .    |
1002    | 2b .....+-> pfs_user(U).event_name(S)    =====>> [C]
1003    |    .    |
1004    | 2c .....+-> pfs_host(H).event_name(S)    =====>> [D], [E]
1005    |    .    .    |
1006    |    .    .    | [4-RESET]
1007    | 2d .    .    |
1008 1b |----+----+----+-> pfs_stage_class(S)      =====>> [E]
1009 
1010 @endverbatim
1011 
1012   Implemented as:
1013   - [1] @c start_stage_v1()
1014   - [2] @c delete_thread_v1(), @c aggregate_thread_stages()
1015   - [3] @c PFS_account::aggregate_stages()
1016   - [4] @c PFS_host::aggregate_stages()
1017   - [A] EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1018         @c table_esgs_by_thread_by_event_name::make_row()
1019   - [B] EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1020         @c table_esgs_by_account_by_event_name::make_row()
1021   - [C] EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME,
1022         @c table_esgs_by_user_by_event_name::make_row()
1023   - [D] EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME,
1024         @c table_esgs_by_host_by_event_name::make_row()
1025   - [E] EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME,
1026         @c table_esgs_global_by_event_name::make_row()
1027 
1028 @section IMPL_STATEMENT Implementation for statements consumers
1029 
1030   For statements, the tables that contains individual event data are:
1031   - EVENTS_STATEMENTS_CURRENT
1032   - EVENTS_STATEMENTS_HISTORY
1033   - EVENTS_STATEMENTS_HISTORY_LONG
1034 
1035   For statements, the tables that contains aggregated data are:
1036   - EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
1037   - EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME
1038   - EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME
1039   - EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME
1040   - EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME
1041   - EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1042 
1043 @verbatim
1044   statement_locker(T, S)
1045    |
1046    | [1]
1047    |
1048 1a |-> pfs_thread(T).event_name(S)            =====>> [A], [B], [C], [D], [E]
1049    |    |
1050    |    | [2]
1051    |    |
1052    | 2a |-> pfs_account(U, H).event_name(S)   =====>> [B], [C], [D], [E]
1053    |    .    |
1054    |    .    | [3-RESET]
1055    |    .    |
1056    | 2b .....+-> pfs_user(U).event_name(S)    =====>> [C]
1057    |    .    |
1058    | 2c .....+-> pfs_host(H).event_name(S)    =====>> [D], [E]
1059    |    .    .    |
1060    |    .    .    | [4-RESET]
1061    | 2d .    .    |
1062 1b |----+----+----+-> pfs_statement_class(S)  =====>> [E]
1063    |
1064 1c |-> pfs_thread(T).statement_current(S)     =====>> [F]
1065    |
1066 1d |-> pfs_thread(T).statement_history(S)     =====>> [G]
1067    |
1068 1e |-> statement_history_long(S)              =====>> [H]
1069    |
1070 1f |-> statement_digest(S)                    =====>> [I]
1071 
1072 @endverbatim
1073 
1074   Implemented as:
1075   - [1] @c start_statement_v1(), end_statement_v1()
1076        (1a, 1b) is an aggregation by EVENT_NAME,
1077         (1c, 1d, 1e) is an aggregation by TIME,
1078         (1f) is an aggregation by DIGEST
1079         all of these are orthogonal,
1080         and implemented in end_statement_v1().
1081   - [2] @c delete_thread_v1(), @c aggregate_thread_statements()
1082   - [3] @c PFS_account::aggregate_statements()
1083   - [4] @c PFS_host::aggregate_statements()
1084   - [A] EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1085         @c table_esms_by_thread_by_event_name::make_row()
1086   - [B] EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1087         @c table_esms_by_account_by_event_name::make_row()
1088   - [C] EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME,
1089         @c table_esms_by_user_by_event_name::make_row()
1090   - [D] EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME,
1091         @c table_esms_by_host_by_event_name::make_row()
1092   - [E] EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME,
1093         @c table_esms_global_by_event_name::make_row()
1094   - [F] EVENTS_STATEMENTS_CURRENT,
1095         @c table_events_statements_current::rnd_next(),
1096         @c table_events_statements_common::make_row()
1097   - [G] EVENTS_STATEMENTS_HISTORY,
1098         @c table_events_statements_history::rnd_next(),
1099         @c table_events_statements_common::make_row()
1100   - [H] EVENTS_STATEMENTS_HISTORY_LONG,
1101         @c table_events_statements_history_long::rnd_next(),
1102         @c table_events_statements_common::make_row()
1103   - [I] EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1104         @c table_esms_by_digest::make_row()
1105 */
1106 
1107 /**
1108   @defgroup Performance_schema Performance Schema
1109   The performance schema component.
1110   For details, see the
1111   @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
1112 
1113   @defgroup Performance_schema_implementation Performance Schema Implementation
1114   @ingroup Performance_schema
1115 
1116   @defgroup Performance_schema_tables Performance Schema Tables
1117   @ingroup Performance_schema_implementation
1118 */
1119 
1120 pthread_key(PFS_thread*, THR_PFS);
1121 bool THR_PFS_initialized= false;
1122 
1123 /**
1124   Conversion map from PSI_mutex_operation to enum_operation_type.
1125   Indexed by enum PSI_mutex_operation.
1126 */
1127 static enum_operation_type mutex_operation_map[]=
1128 {
1129   OPERATION_TYPE_LOCK,
1130   OPERATION_TYPE_TRYLOCK
1131 };
1132 
1133 /**
1134   Conversion map from PSI_rwlock_operation to enum_operation_type.
1135   Indexed by enum PSI_rwlock_operation.
1136 */
1137 static enum_operation_type rwlock_operation_map[]=
1138 {
1139   OPERATION_TYPE_READLOCK,
1140   OPERATION_TYPE_WRITELOCK,
1141   OPERATION_TYPE_TRYREADLOCK,
1142   OPERATION_TYPE_TRYWRITELOCK
1143 };
1144 
1145 /**
1146   Conversion map from PSI_cond_operation to enum_operation_type.
1147   Indexed by enum PSI_cond_operation.
1148 */
1149 static enum_operation_type cond_operation_map[]=
1150 {
1151   OPERATION_TYPE_WAIT,
1152   OPERATION_TYPE_TIMEDWAIT
1153 };
1154 
1155 /**
1156   Conversion map from PSI_file_operation to enum_operation_type.
1157   Indexed by enum PSI_file_operation.
1158 */
1159 static enum_operation_type file_operation_map[]=
1160 {
1161   OPERATION_TYPE_FILECREATE,
1162   OPERATION_TYPE_FILECREATETMP,
1163   OPERATION_TYPE_FILEOPEN,
1164   OPERATION_TYPE_FILESTREAMOPEN,
1165   OPERATION_TYPE_FILECLOSE,
1166   OPERATION_TYPE_FILESTREAMCLOSE,
1167   OPERATION_TYPE_FILEREAD,
1168   OPERATION_TYPE_FILEWRITE,
1169   OPERATION_TYPE_FILESEEK,
1170   OPERATION_TYPE_FILETELL,
1171   OPERATION_TYPE_FILEFLUSH,
1172   OPERATION_TYPE_FILESTAT,
1173   OPERATION_TYPE_FILEFSTAT,
1174   OPERATION_TYPE_FILECHSIZE,
1175   OPERATION_TYPE_FILEDELETE,
1176   OPERATION_TYPE_FILERENAME,
1177   OPERATION_TYPE_FILESYNC
1178 };
1179 
1180 /**
1181   Conversion map from PSI_table_operation to enum_operation_type.
1182   Indexed by enum PSI_table_io_operation.
1183 */
1184 static enum_operation_type table_io_operation_map[]=
1185 {
1186   OPERATION_TYPE_TABLE_FETCH,
1187   OPERATION_TYPE_TABLE_WRITE_ROW,
1188   OPERATION_TYPE_TABLE_UPDATE_ROW,
1189   OPERATION_TYPE_TABLE_DELETE_ROW
1190 };
1191 
1192 /**
1193   Conversion map from enum PFS_TL_LOCK_TYPE to enum_operation_type.
1194   Indexed by enum PFS_TL_LOCK_TYPE.
1195 */
1196 static enum_operation_type table_lock_operation_map[]=
1197 {
1198   OPERATION_TYPE_TL_READ_NORMAL, /* PFS_TL_READ */
1199   OPERATION_TYPE_TL_READ_WITH_SHARED_LOCKS, /* PFS_TL_READ_WITH_SHARED_LOCKS */
1200   OPERATION_TYPE_TL_READ_HIGH_PRIORITY, /* PFS_TL_READ_HIGH_PRIORITY */
1201   OPERATION_TYPE_TL_READ_NO_INSERTS, /* PFS_TL_READ_NO_INSERT */
1202   OPERATION_TYPE_TL_WRITE_ALLOW_WRITE, /* PFS_TL_WRITE_ALLOW_WRITE */
1203   OPERATION_TYPE_TL_WRITE_CONCURRENT_INSERT, /* PFS_TL_WRITE_CONCURRENT_INSERT */
1204   OPERATION_TYPE_TL_WRITE_DELAYED, /* PFS_TL_WRITE_DELAYED */
1205   OPERATION_TYPE_TL_WRITE_LOW_PRIORITY, /* PFS_TL_WRITE_LOW_PRIORITY */
1206   OPERATION_TYPE_TL_WRITE_NORMAL, /* PFS_TL_WRITE */
1207   OPERATION_TYPE_TL_READ_EXTERNAL, /* PFS_TL_READ_EXTERNAL */
1208   OPERATION_TYPE_TL_WRITE_EXTERNAL /* PFS_TL_WRITE_EXTERNAL */
1209 };
1210 
1211 /**
1212   Conversion map from PSI_socket_operation to enum_operation_type.
1213   Indexed by enum PSI_socket_operation.
1214 */
1215 static enum_operation_type socket_operation_map[]=
1216 {
1217   OPERATION_TYPE_SOCKETCREATE,
1218   OPERATION_TYPE_SOCKETCONNECT,
1219   OPERATION_TYPE_SOCKETBIND,
1220   OPERATION_TYPE_SOCKETCLOSE,
1221   OPERATION_TYPE_SOCKETSEND,
1222   OPERATION_TYPE_SOCKETRECV,
1223   OPERATION_TYPE_SOCKETSENDTO,
1224   OPERATION_TYPE_SOCKETRECVFROM,
1225   OPERATION_TYPE_SOCKETSENDMSG,
1226   OPERATION_TYPE_SOCKETRECVMSG,
1227   OPERATION_TYPE_SOCKETSEEK,
1228   OPERATION_TYPE_SOCKETOPT,
1229   OPERATION_TYPE_SOCKETSTAT,
1230   OPERATION_TYPE_SOCKETSHUTDOWN,
1231   OPERATION_TYPE_SOCKETSELECT
1232 };
1233 
1234 /**
1235   Build the prefix name of a class of instruments in a category.
1236   For example, this function builds the string 'wait/sync/mutex/sql/' from
1237   a prefix 'wait/sync/mutex' and a category 'sql'.
1238   This prefix is used later to build each instrument name, such as
1239   'wait/sync/mutex/sql/LOCK_open'.
1240   @param prefix               Prefix for this class of instruments
1241   @param category             Category name
1242   @param [out] output         Buffer of length PFS_MAX_INFO_NAME_LENGTH.
1243   @param [out] output_length  Length of the resulting output string.
1244   @return 0 for success, non zero for errors
1245 */
build_prefix(const LEX_CSTRING * prefix,const char * category,char * output,int * output_length)1246 static int build_prefix(const LEX_CSTRING *prefix, const char *category,
1247                         char *output, int *output_length)
1248 {
1249   size_t len= strlen(category);
1250   char *out_ptr= output;
1251   size_t prefix_length= prefix->length;
1252 
1253   if (unlikely((prefix_length + len + 1) >=
1254                PFS_MAX_FULL_PREFIX_NAME_LENGTH))
1255   {
1256     pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
1257                     prefix->str, category);
1258     return 1;
1259   }
1260 
1261   if (unlikely(strchr(category, '/') != NULL))
1262   {
1263     pfs_print_error("build_prefix: invalid category <%s>\n",
1264                     category);
1265     return 1;
1266   }
1267 
1268   /* output = prefix + category + '/' */
1269   memcpy(out_ptr, prefix->str, prefix_length);
1270   out_ptr+= prefix_length;
1271   memcpy(out_ptr, category, len);
1272   out_ptr+= len;
1273   *out_ptr= '/';
1274   out_ptr++;
1275   *output_length= (int)(out_ptr - output);
1276 
1277   return 0;
1278 }
1279 
1280 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC)                \
1281   KEY_T key;                                                          \
1282   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];                      \
1283   int prefix_length;                                                  \
1284   int len;                                                            \
1285   int full_length;                                                    \
1286                                                                       \
1287   DBUG_ASSERT(category != NULL);                                      \
1288   DBUG_ASSERT(info != NULL);                                          \
1289   if (unlikely(build_prefix(&PREFIX, category,                        \
1290                    formatted_name, &prefix_length)))                  \
1291   {                                                                   \
1292     for (; count>0; count--, info++)                                  \
1293       *(info->m_key)= 0;                                              \
1294     return ;                                                          \
1295   }                                                                   \
1296                                                                       \
1297   for (; count>0; count--, info++)                                    \
1298   {                                                                   \
1299     DBUG_ASSERT(info->m_key != NULL);                                 \
1300     DBUG_ASSERT(info->m_name != NULL);                                \
1301     len= (int)strlen(info->m_name);                                        \
1302     full_length= prefix_length + len;                                 \
1303     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))              \
1304     {                                                                 \
1305       memcpy(formatted_name + prefix_length, info->m_name, len);      \
1306       key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
1307     }                                                                 \
1308     else                                                              \
1309     {                                                                 \
1310       pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n",  \
1311                       category, info->m_name);                        \
1312       key= 0;                                                         \
1313     }                                                                 \
1314                                                                       \
1315     *(info->m_key)= key;                                              \
1316   }                                                                   \
1317   return;
1318 
1319 /* Use C linkage for the interface functions. */
1320 
1321 C_MODE_START
1322 
1323 /**
1324   Implementation of the mutex instrumentation interface.
1325   @sa PSI_v1::register_mutex.
1326 */
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)1327 static void register_mutex_v1(const char *category,
1328                               PSI_mutex_info_v1 *info,
1329                               int count)
1330 {
1331   REGISTER_BODY_V1(PSI_mutex_key,
1332                    mutex_instrument_prefix,
1333                    register_mutex_class)
1334 }
1335 
1336 /**
1337   Implementation of the rwlock instrumentation interface.
1338   @sa PSI_v1::register_rwlock.
1339 */
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)1340 static void register_rwlock_v1(const char *category,
1341                                PSI_rwlock_info_v1 *info,
1342                                int count)
1343 {
1344   REGISTER_BODY_V1(PSI_rwlock_key,
1345                    rwlock_instrument_prefix,
1346                    register_rwlock_class)
1347 }
1348 
1349 /**
1350   Implementation of the cond instrumentation interface.
1351   @sa PSI_v1::register_cond.
1352 */
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)1353 static void register_cond_v1(const char *category,
1354                              PSI_cond_info_v1 *info,
1355                              int count)
1356 {
1357   REGISTER_BODY_V1(PSI_cond_key,
1358                    cond_instrument_prefix,
1359                    register_cond_class)
1360 }
1361 
1362 /**
1363   Implementation of the thread instrumentation interface.
1364   @sa PSI_v1::register_thread.
1365 */
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)1366 static void register_thread_v1(const char *category,
1367                                PSI_thread_info_v1 *info,
1368                                int count)
1369 {
1370   REGISTER_BODY_V1(PSI_thread_key,
1371                    thread_instrument_prefix,
1372                    register_thread_class)
1373 }
1374 
1375 /**
1376   Implementation of the file instrumentation interface.
1377   @sa PSI_v1::register_file.
1378 */
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)1379 static void register_file_v1(const char *category,
1380                              PSI_file_info_v1 *info,
1381                              int count)
1382 {
1383   REGISTER_BODY_V1(PSI_file_key,
1384                    file_instrument_prefix,
1385                    register_file_class)
1386 }
1387 
register_stage_v1(const char * category,PSI_stage_info_v1 ** info_array,int count)1388 static void register_stage_v1(const char *category,
1389                               PSI_stage_info_v1 **info_array,
1390                               int count)
1391 {
1392   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1393   int prefix_length;
1394   int len;
1395   int full_length;
1396   PSI_stage_info_v1 *info;
1397 
1398   DBUG_ASSERT(category != NULL);
1399   DBUG_ASSERT(info_array != NULL);
1400   if (unlikely(build_prefix(&stage_instrument_prefix, category,
1401                formatted_name, &prefix_length)))
1402   {
1403     for (; count>0; count--, info_array++)
1404       (*info_array)->m_key= 0;
1405     return ;
1406   }
1407 
1408   for (; count>0; count--, info_array++)
1409   {
1410     info= *info_array;
1411     DBUG_ASSERT(info != NULL);
1412     DBUG_ASSERT(info->m_name != NULL);
1413     len= (int)strlen(info->m_name);
1414     full_length= prefix_length + len;
1415     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1416     {
1417       memcpy(formatted_name + prefix_length, info->m_name, len);
1418       info->m_key= register_stage_class(formatted_name,
1419                                         prefix_length,
1420                                         full_length,
1421                                         info->m_flags);
1422     }
1423     else
1424     {
1425       pfs_print_error("register_stage_v1: name too long <%s> <%s>\n",
1426                       category, info->m_name);
1427       info->m_key= 0;
1428     }
1429   }
1430   return;
1431 }
1432 
register_statement_v1(const char * category,PSI_statement_info_v1 * info,int count)1433 static void register_statement_v1(const char *category,
1434                                   PSI_statement_info_v1 *info,
1435                                   int count)
1436 {
1437   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1438   int prefix_length;
1439   int len;
1440   int full_length;
1441 
1442   DBUG_ASSERT(category != NULL);
1443   DBUG_ASSERT(info != NULL);
1444   if (unlikely(build_prefix(&statement_instrument_prefix,
1445                             category, formatted_name, &prefix_length)))
1446   {
1447     for (; count>0; count--, info++)
1448       info->m_key= 0;
1449     return ;
1450   }
1451 
1452   for (; count>0; count--, info++)
1453   {
1454     if (info->m_name == NULL)
1455       continue;
1456 
1457     len= (int)strlen(info->m_name);
1458     full_length= prefix_length + len;
1459     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1460     {
1461       memcpy(formatted_name + prefix_length, info->m_name, len);
1462       info->m_key= register_statement_class(formatted_name, full_length, info->m_flags);
1463     }
1464     else
1465     {
1466       pfs_print_error("register_statement_v1: name too long <%s>\n",
1467                       info->m_name);
1468       info->m_key= 0;
1469     }
1470   }
1471   return;
1472 }
1473 
register_socket_v1(const char * category,PSI_socket_info_v1 * info,int count)1474 static void register_socket_v1(const char *category,
1475                              PSI_socket_info_v1 *info,
1476                              int count)
1477 {
1478   REGISTER_BODY_V1(PSI_socket_key,
1479                    socket_instrument_prefix,
1480                    register_socket_class)
1481 }
1482 
1483 #define INIT_BODY_V1(T, KEY, ID)                                            \
1484   PFS_##T##_class *klass;                                                   \
1485   PFS_##T *pfs;                                                             \
1486   klass= find_##T##_class(KEY);                                             \
1487   if (unlikely(klass == NULL))                                              \
1488     return NULL;                                                            \
1489   if (! klass->m_enabled)                                                   \
1490     return NULL;                                                            \
1491   pfs= create_##T(klass, ID);                                               \
1492   return reinterpret_cast<PSI_##T *> (pfs)
1493 
1494 /**
1495   Implementation of the mutex instrumentation interface.
1496   @sa PSI_v1::init_mutex.
1497 */
1498 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,void * identity)1499 init_mutex_v1(PSI_mutex_key key, void *identity)
1500 {
1501   INIT_BODY_V1(mutex, key, identity);
1502 }
1503 
1504 /**
1505   Implementation of the mutex instrumentation interface.
1506   @sa PSI_v1::destroy_mutex.
1507 */
destroy_mutex_v1(PSI_mutex * mutex)1508 static void destroy_mutex_v1(PSI_mutex* mutex)
1509 {
1510   PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
1511 
1512   DBUG_ASSERT(pfs != NULL);
1513 
1514   destroy_mutex(pfs);
1515 }
1516 
1517 /**
1518   Implementation of the rwlock instrumentation interface.
1519   @sa PSI_v1::init_rwlock.
1520 */
1521 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,void * identity)1522 init_rwlock_v1(PSI_rwlock_key key, void *identity)
1523 {
1524   INIT_BODY_V1(rwlock, key, identity);
1525 }
1526 
1527 /**
1528   Implementation of the rwlock instrumentation interface.
1529   @sa PSI_v1::destroy_rwlock.
1530 */
destroy_rwlock_v1(PSI_rwlock * rwlock)1531 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
1532 {
1533   PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
1534 
1535   DBUG_ASSERT(pfs != NULL);
1536 
1537   destroy_rwlock(pfs);
1538 }
1539 
1540 /**
1541   Implementation of the cond instrumentation interface.
1542   @sa PSI_v1::init_cond.
1543 */
1544 static PSI_cond*
init_cond_v1(PSI_cond_key key,void * identity)1545 init_cond_v1(PSI_cond_key key, void *identity)
1546 {
1547   INIT_BODY_V1(cond, key, identity);
1548 }
1549 
1550 /**
1551   Implementation of the cond instrumentation interface.
1552   @sa PSI_v1::destroy_cond.
1553 */
destroy_cond_v1(PSI_cond * cond)1554 static void destroy_cond_v1(PSI_cond* cond)
1555 {
1556   PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
1557 
1558   DBUG_ASSERT(pfs != NULL);
1559 
1560   destroy_cond(pfs);
1561 }
1562 
1563 /**
1564   Implementation of the table instrumentation interface.
1565   @sa PSI_v1::get_table_share.
1566 */
1567 static PSI_table_share*
get_table_share_v1(my_bool temporary,TABLE_SHARE * share)1568 get_table_share_v1(my_bool temporary, TABLE_SHARE *share)
1569 {
1570   /* Ignore temporary tables and views. */
1571   if (temporary || share->is_view)
1572     return NULL;
1573   /* An instrumented thread is required, for LF_PINS. */
1574   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1575   if (unlikely(pfs_thread == NULL))
1576     return NULL;
1577   PFS_table_share* pfs_share;
1578   pfs_share= find_or_create_table_share(pfs_thread, temporary, share);
1579   return reinterpret_cast<PSI_table_share*> (pfs_share);
1580 }
1581 
1582 /**
1583   Implementation of the table instrumentation interface.
1584   @sa PSI_v1::release_table_share.
1585 */
release_table_share_v1(PSI_table_share * share)1586 static void release_table_share_v1(PSI_table_share* share)
1587 {
1588   PFS_table_share* pfs= reinterpret_cast<PFS_table_share*> (share);
1589 
1590   if (unlikely(pfs == NULL))
1591     return;
1592 
1593   release_table_share(pfs);
1594 }
1595 
1596 /**
1597   Implementation of the table instrumentation interface.
1598   @sa PSI_v1::drop_table_share.
1599 */
1600 static void
drop_table_share_v1(my_bool temporary,const char * schema_name,int schema_name_length,const char * table_name,int table_name_length)1601 drop_table_share_v1(my_bool temporary,
1602                     const char *schema_name, int schema_name_length,
1603                     const char *table_name, int table_name_length)
1604 {
1605   /* Ignore temporary tables. */
1606   if (temporary)
1607     return;
1608   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1609   if (unlikely(pfs_thread == NULL))
1610     return;
1611   /* TODO: temporary tables */
1612   drop_table_share(pfs_thread, temporary, schema_name, schema_name_length,
1613                    table_name, table_name_length);
1614 }
1615 
1616 /**
1617   Implementation of the table instrumentation interface.
1618   @sa PSI_v1::open_table.
1619 */
1620 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)1621 open_table_v1(PSI_table_share *share, const void *identity)
1622 {
1623   PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1624 
1625   /*
1626     When the performance schema is off, do not instrument anything.
1627     Table handles have short life cycle, instrumentation will happen
1628     again if needed during the next open().
1629   */
1630   if (psi_unlikely(! flag_global_instrumentation))
1631     return NULL;
1632 
1633   if (unlikely(pfs_table_share == NULL))
1634     return NULL;
1635 
1636   /* This object is not to be instrumented. */
1637   if (! pfs_table_share->m_enabled)
1638     return NULL;
1639 
1640   /* This object is instrumented, but all table instruments are disabled. */
1641   if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1642     return NULL;
1643 
1644   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1645   if (unlikely(thread == NULL))
1646     return NULL;
1647 
1648   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1649   return reinterpret_cast<PSI_table *> (pfs_table);
1650 }
1651 
1652 /**
1653   Implementation of the table instrumentation interface.
1654   @sa PSI_v1::unbind_table.
1655 */
unbind_table_v1(PSI_table * table)1656 static void unbind_table_v1(PSI_table *table)
1657 {
1658   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1659   if (likely(pfs != NULL))
1660   {
1661     pfs->m_thread_owner= NULL;
1662   }
1663 }
1664 
1665 /**
1666   Implementation of the table instrumentation interface.
1667   @sa PSI_v1::rebind_table.
1668 */
1669 static PSI_table *
rebind_table_v1(PSI_table_share * share,const void * identity,PSI_table * table)1670 rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
1671 {
1672   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1673   if (likely(pfs != NULL))
1674   {
1675     PFS_thread *thread;
1676     DBUG_ASSERT(pfs->m_thread_owner == NULL);
1677 
1678     if (psi_unlikely(! flag_global_instrumentation))
1679     {
1680       destroy_table(pfs);
1681       return NULL;
1682     }
1683 
1684     /* The table handle was already instrumented, reuse it for this thread. */
1685     thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1686 
1687     if (unlikely(! pfs->m_share->m_enabled))
1688     {
1689       destroy_table(pfs);
1690       return NULL;
1691     }
1692 
1693     if (unlikely(! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled))
1694     {
1695       destroy_table(pfs);
1696       return NULL;
1697     }
1698 
1699     pfs->m_thread_owner= thread;
1700     return table;
1701   }
1702 
1703   if (psi_unlikely(! flag_global_instrumentation))
1704     return NULL;
1705 
1706   /* See open_table_v1() */
1707 
1708   PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1709 
1710   if (unlikely(pfs_table_share == NULL))
1711     return NULL;
1712 
1713   if (! pfs_table_share->m_enabled)
1714     return NULL;
1715 
1716   if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1717     return NULL;
1718 
1719   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1720   if (unlikely(thread == NULL))
1721     return NULL;
1722 
1723   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1724   return reinterpret_cast<PSI_table *> (pfs_table);
1725 }
1726 
1727 /**
1728   Implementation of the table instrumentation interface.
1729   @sa PSI_v1::close_table.
1730 */
close_table_v1(PSI_table * table)1731 static void close_table_v1(PSI_table *table)
1732 {
1733   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1734   if (unlikely(pfs == NULL))
1735     return;
1736   pfs->aggregate();
1737   destroy_table(pfs);
1738 }
1739 
1740 static PSI_socket*
init_socket_v1(PSI_socket_key key,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)1741 init_socket_v1(PSI_socket_key key, const my_socket *fd,
1742                const struct sockaddr *addr, socklen_t addr_len)
1743 {
1744   PFS_socket_class *klass;
1745   PFS_socket *pfs;
1746   klass= find_socket_class(key);
1747   if (unlikely(klass == NULL))
1748     return NULL;
1749   if (! klass->m_enabled)
1750     return NULL;
1751   pfs= create_socket(klass, fd, addr, addr_len);
1752   return reinterpret_cast<PSI_socket *> (pfs);
1753 }
1754 
destroy_socket_v1(PSI_socket * socket)1755 static void destroy_socket_v1(PSI_socket *socket)
1756 {
1757   PFS_socket *pfs= reinterpret_cast<PFS_socket*> (socket);
1758 
1759   DBUG_ASSERT(pfs != NULL);
1760 
1761   destroy_socket(pfs);
1762 }
1763 
1764 /**
1765   Implementation of the file instrumentation interface.
1766   @sa PSI_v1::create_file.
1767 */
create_file_v1(PSI_file_key key,const char * name,File file)1768 static void create_file_v1(PSI_file_key key, const char *name, File file)
1769 {
1770   if (psi_unlikely(! flag_global_instrumentation))
1771     return;
1772   int index= (int) file;
1773   if (unlikely(index < 0))
1774     return;
1775   PFS_file_class *klass= find_file_class(key);
1776   if (unlikely(klass == NULL))
1777     return;
1778   if (! klass->m_enabled)
1779     return;
1780 
1781   /* A thread is needed for LF_PINS */
1782   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1783   if (unlikely(pfs_thread == NULL))
1784     return;
1785 
1786   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
1787     return;
1788 
1789   /*
1790     We want this check after pfs_thread->m_enabled,
1791     to avoid reporting false loss.
1792   */
1793   if (unlikely(index >= file_handle_max))
1794   {
1795     file_handle_lost++;
1796     return;
1797   }
1798 
1799   uint len= (uint)strlen(name);
1800   PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
1801 
1802   file_handle_array[index]= pfs_file;
1803 }
1804 
1805 /**
1806   Arguments given from a parent to a child thread, packaged in one structure.
1807   This data is used when spawning a new instrumented thread.
1808   @sa pfs_spawn_thread.
1809 */
1810 struct PFS_spawn_thread_arg
1811 {
1812   ulonglong m_thread_internal_id;
1813   char m_username[USERNAME_LENGTH];
1814   uint m_username_length;
1815   char m_hostname[HOSTNAME_LENGTH];
1816   uint m_hostname_length;
1817 
1818   PSI_thread_key m_child_key;
1819   const void *m_child_identity;
1820   void *(*m_user_start_routine)(void*);
1821   void *m_user_arg;
1822 };
1823 
pfs_spawn_thread(void * arg)1824 void* pfs_spawn_thread(void *arg)
1825 {
1826   PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
1827   void *user_arg;
1828   void *(*user_start_routine)(void*);
1829 
1830   PFS_thread *pfs;
1831 
1832   /* First, attach instrumentation to this newly created pthread. */
1833   PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
1834   if (likely(klass != NULL))
1835   {
1836     pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1837     if (likely(pfs != NULL))
1838     {
1839       clear_thread_account(pfs);
1840 
1841       pfs->m_parent_thread_internal_id= typed_arg->m_thread_internal_id;
1842 
1843       memcpy(pfs->m_username, typed_arg->m_username, sizeof(pfs->m_username));
1844       pfs->m_username_length= typed_arg->m_username_length;
1845 
1846       memcpy(pfs->m_hostname, typed_arg->m_hostname, sizeof(pfs->m_hostname));
1847       pfs->m_hostname_length= typed_arg->m_hostname_length;
1848 
1849       set_thread_account(pfs);
1850     }
1851   }
1852   else
1853   {
1854     pfs= NULL;
1855   }
1856   my_pthread_setspecific_ptr(THR_PFS, pfs);
1857 
1858   /*
1859     Secondly, free the memory allocated in spawn_thread_v1().
1860     It is preferable to do this before invoking the user
1861     routine, to avoid memory leaks at shutdown, in case
1862     the server exits without waiting for this thread.
1863   */
1864   user_start_routine= typed_arg->m_user_start_routine;
1865   user_arg= typed_arg->m_user_arg;
1866   my_free(typed_arg);
1867 
1868   /* Then, execute the user code for this thread. */
1869   (*user_start_routine)(user_arg);
1870 
1871   return NULL;
1872 }
1873 
1874 /**
1875   Implementation of the thread instrumentation interface.
1876   @sa PSI_v1::spawn_thread.
1877 */
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1878 static int spawn_thread_v1(PSI_thread_key key,
1879                            pthread_t *thread, const pthread_attr_t *attr,
1880                            void *(*start_routine)(void*), void *arg)
1881 {
1882   PFS_spawn_thread_arg *psi_arg;
1883   PFS_thread *parent;
1884 
1885   /* psi_arg can not be global, and can not be a local variable. */
1886   psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1887                                              MYF(MY_WME));
1888   if (unlikely(psi_arg == NULL))
1889     return EAGAIN;
1890 
1891   psi_arg->m_child_key= key;
1892   psi_arg->m_child_identity= (arg ? arg : thread);
1893   psi_arg->m_user_start_routine= start_routine;
1894   psi_arg->m_user_arg= arg;
1895 
1896   parent= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1897   if (parent != NULL)
1898   {
1899     /*
1900       Make a copy of the parent attributes.
1901       This is required, because instrumentation for this thread (the parent)
1902       may be destroyed before the child thread instrumentation is created.
1903     */
1904     psi_arg->m_thread_internal_id= parent->m_thread_internal_id;
1905 
1906     memcpy(psi_arg->m_username, parent->m_username, sizeof(psi_arg->m_username));
1907     psi_arg->m_username_length= parent->m_username_length;
1908 
1909     memcpy(psi_arg->m_hostname, parent->m_hostname, sizeof(psi_arg->m_hostname));
1910     psi_arg->m_hostname_length= parent->m_hostname_length;
1911   }
1912   else
1913   {
1914     psi_arg->m_thread_internal_id= 0;
1915     psi_arg->m_username_length= 0;
1916     psi_arg->m_hostname_length= 0;
1917   }
1918 
1919   int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1920   if (unlikely(result != 0))
1921     my_free(psi_arg);
1922   return result;
1923 }
1924 
1925 /**
1926   Implementation of the thread instrumentation interface.
1927   @sa PSI_v1::new_thread.
1928 */
1929 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulonglong processlist_id)1930 new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
1931 {
1932   PFS_thread *pfs;
1933 
1934   PFS_thread_class *klass= find_thread_class(key);
1935   if (likely(klass != NULL))
1936     pfs= create_thread(klass, identity, processlist_id);
1937   else
1938     pfs= NULL;
1939 
1940   return reinterpret_cast<PSI_thread*> (pfs);
1941 }
1942 
1943 /**
1944   Implementation of the thread instrumentation interface.
1945   @sa PSI_v1::set_thread_id.
1946 */
set_thread_id_v1(PSI_thread * thread,ulonglong processlist_id)1947 static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
1948 {
1949   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1950   if (unlikely(pfs == NULL))
1951     return;
1952   pfs->m_processlist_id= (ulong)processlist_id;
1953 }
1954 
1955 /**
1956   Implementation of the thread instrumentation interface.
1957   @sa PSI_v1::get_thread_id.
1958 */
1959 static PSI_thread*
get_thread_v1(void)1960 get_thread_v1(void)
1961 {
1962   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1963   return reinterpret_cast<PSI_thread*> (pfs);
1964 }
1965 
1966 /**
1967   Implementation of the thread instrumentation interface.
1968   @sa PSI_v1::set_thread_user.
1969 */
set_thread_user_v1(const char * user,int user_len)1970 static void set_thread_user_v1(const char *user, int user_len)
1971 {
1972   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1973 
1974   DBUG_ASSERT((user != NULL) || (user_len == 0));
1975   DBUG_ASSERT(user_len >= 0);
1976   DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
1977 
1978   if (unlikely(pfs == NULL))
1979     return;
1980 
1981   aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
1982 
1983   pfs->m_session_lock.allocated_to_dirty();
1984 
1985   clear_thread_account(pfs);
1986 
1987   if (user_len > 0)
1988     memcpy(pfs->m_username, user, user_len);
1989   pfs->m_username_length= user_len;
1990 
1991   set_thread_account(pfs);
1992 
1993   bool enabled= true;
1994   if (flag_thread_instrumentation)
1995   {
1996     if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
1997     {
1998       /*
1999         TODO: performance improvement.
2000         Once performance_schema.USERS is exposed,
2001         we can use PFS_user::m_enabled instead of looking up
2002         SETUP_ACTORS every time.
2003       */
2004       lookup_setup_actor(pfs,
2005                          pfs->m_username, pfs->m_username_length,
2006                          pfs->m_hostname, pfs->m_hostname_length,
2007                          &enabled);
2008     }
2009   }
2010 
2011   pfs->m_enabled= enabled;
2012 
2013   pfs->m_session_lock.dirty_to_allocated();
2014 }
2015 
2016 /**
2017   Implementation of the thread instrumentation interface.
2018   @sa PSI_v1::set_thread_account.
2019 */
set_thread_account_v1(const char * user,int user_len,const char * host,int host_len)2020 static void set_thread_account_v1(const char *user, int user_len,
2021                                     const char *host, int host_len)
2022 {
2023   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2024 
2025   DBUG_ASSERT((user != NULL) || (user_len == 0));
2026   DBUG_ASSERT(user_len >= 0);
2027   DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
2028   DBUG_ASSERT((host != NULL) || (host_len == 0));
2029   DBUG_ASSERT(host_len >= 0);
2030 
2031   host_len= MY_MIN(host_len, static_cast<int>(sizeof(pfs->m_hostname)));
2032 
2033   if (unlikely(pfs == NULL))
2034     return;
2035 
2036   pfs->m_session_lock.allocated_to_dirty();
2037 
2038   clear_thread_account(pfs);
2039 
2040   if (host_len > 0)
2041     memcpy(pfs->m_hostname, host, host_len);
2042   pfs->m_hostname_length= host_len;
2043 
2044   if (user_len > 0)
2045     memcpy(pfs->m_username, user, user_len);
2046   pfs->m_username_length= user_len;
2047 
2048   set_thread_account(pfs);
2049 
2050   bool enabled= true;
2051   if (flag_thread_instrumentation)
2052   {
2053     if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
2054     {
2055       /*
2056         TODO: performance improvement.
2057         Once performance_schema.USERS is exposed,
2058         we can use PFS_user::m_enabled instead of looking up
2059         SETUP_ACTORS every time.
2060       */
2061       lookup_setup_actor(pfs,
2062                          pfs->m_username, pfs->m_username_length,
2063                          pfs->m_hostname, pfs->m_hostname_length,
2064                          &enabled);
2065     }
2066   }
2067   pfs->m_enabled= enabled;
2068 
2069   pfs->m_session_lock.dirty_to_allocated();
2070 }
2071 
2072 /**
2073   Implementation of the thread instrumentation interface.
2074   @sa PSI_v1::set_thread_db.
2075 */
set_thread_db_v1(const char * db,int db_len)2076 static void set_thread_db_v1(const char* db, int db_len)
2077 {
2078   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2079 
2080   DBUG_ASSERT((db != NULL) || (db_len == 0));
2081   DBUG_ASSERT(db_len >= 0);
2082   DBUG_ASSERT((uint) db_len <= sizeof(pfs->m_dbname));
2083 
2084   if (likely(pfs != NULL))
2085   {
2086     pfs->m_stmt_lock.allocated_to_dirty();
2087     if (db_len > 0)
2088       memcpy(pfs->m_dbname, db, db_len);
2089     pfs->m_dbname_length= db_len;
2090     pfs->m_stmt_lock.dirty_to_allocated();
2091   }
2092 }
2093 
2094 /**
2095   Implementation of the thread instrumentation interface.
2096   @sa PSI_v1::set_thread_command.
2097 */
set_thread_command_v1(int command)2098 static void set_thread_command_v1(int command)
2099 {
2100   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2101 
2102   DBUG_ASSERT(command >= 0);
2103   DBUG_ASSERT(command <= (int) COM_END);
2104 
2105   if (likely(pfs != NULL))
2106   {
2107     pfs->m_command= command;
2108   }
2109 }
2110 
2111 /**
2112   Implementation of the thread instrumentation interface.
2113   @sa PSI_v1::set_thread_start_time.
2114 */
set_thread_start_time_v1(time_t start_time)2115 static void set_thread_start_time_v1(time_t start_time)
2116 {
2117   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2118 
2119   if (likely(pfs != NULL))
2120   {
2121     pfs->m_start_time= start_time;
2122   }
2123 }
2124 
2125 /**
2126   Implementation of the thread instrumentation interface.
2127   @sa PSI_v1::set_thread_state.
2128 */
set_thread_state_v1(const char * state)2129 static void set_thread_state_v1(const char* state)
2130 {
2131   /* DEPRECATED. */
2132 }
2133 
2134 /**
2135   Implementation of the thread instrumentation interface.
2136   @sa PSI_v1::set_thread_info.
2137 */
set_thread_info_v1(const char * info,uint info_len)2138 static void set_thread_info_v1(const char* info, uint info_len)
2139 {
2140   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2141 
2142   DBUG_ASSERT((info != NULL) || (info_len == 0));
2143 
2144   if (likely(pfs != NULL))
2145   {
2146     if ((info != NULL) && (info_len > 0))
2147     {
2148       if (info_len > sizeof(pfs->m_processlist_info))
2149         info_len= sizeof(pfs->m_processlist_info);
2150 
2151       pfs->m_stmt_lock.allocated_to_dirty();
2152       memcpy(pfs->m_processlist_info, info, info_len);
2153       pfs->m_processlist_info_length= info_len;
2154       pfs->m_stmt_lock.dirty_to_allocated();
2155     }
2156     else
2157     {
2158       pfs->m_stmt_lock.allocated_to_dirty();
2159       pfs->m_processlist_info_length= 0;
2160       pfs->m_stmt_lock.dirty_to_allocated();
2161     }
2162   }
2163 }
2164 
2165 /**
2166   Implementation of the thread instrumentation interface.
2167   @sa PSI_v1::set_thread.
2168 */
set_thread_v1(PSI_thread * thread)2169 static void set_thread_v1(PSI_thread* thread)
2170 {
2171   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2172   my_pthread_setspecific_ptr(THR_PFS, pfs);
2173 }
2174 
2175 /**
2176   Implementation of the thread instrumentation interface.
2177   @sa PSI_v1::delete_current_thread.
2178 */
delete_current_thread_v1(void)2179 static void delete_current_thread_v1(void)
2180 {
2181   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2182   if (thread != NULL)
2183   {
2184     aggregate_thread(thread, thread->m_account, thread->m_user, thread->m_host);
2185     my_pthread_setspecific_ptr(THR_PFS, NULL);
2186     destroy_thread(thread);
2187   }
2188 }
2189 
2190 /**
2191   Implementation of the thread instrumentation interface.
2192   @sa PSI_v1::delete_thread.
2193 */
delete_thread_v1(PSI_thread * thread)2194 static void delete_thread_v1(PSI_thread *thread)
2195 {
2196   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2197 
2198   if (pfs != NULL)
2199   {
2200     aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
2201     destroy_thread(pfs);
2202   }
2203 }
2204 
2205 /**
2206   Implementation of the mutex instrumentation interface.
2207   @sa PSI_v1::start_mutex_wait.
2208 */
2209 static PSI_mutex_locker*
start_mutex_wait_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op,const char * src_file,uint src_line)2210 start_mutex_wait_v1(PSI_mutex_locker_state *state,
2211                     PSI_mutex *mutex, PSI_mutex_operation op,
2212                     const char *src_file, uint src_line)
2213 {
2214   PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
2215   DBUG_ASSERT((int) op >= 0);
2216   DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
2217   DBUG_ASSERT(state != NULL);
2218 
2219   DBUG_ASSERT(pfs_mutex != NULL);
2220   DBUG_ASSERT(pfs_mutex->m_class != NULL);
2221 
2222   if (! pfs_mutex->m_enabled)
2223     return NULL;
2224 
2225   uint flags;
2226   ulonglong timer_start= 0;
2227 
2228   if (flag_thread_instrumentation)
2229   {
2230     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2231     if (unlikely(pfs_thread == NULL))
2232       return NULL;
2233     if (! pfs_thread->m_enabled)
2234       return NULL;
2235     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2236     flags= STATE_FLAG_THREAD;
2237 
2238     if (pfs_mutex->m_timed)
2239     {
2240       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2241       state->m_timer_start= timer_start;
2242       flags|= STATE_FLAG_TIMED;
2243     }
2244 
2245     if (flag_events_waits_current)
2246     {
2247       if (unlikely(pfs_thread->m_events_waits_current >=
2248                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2249       {
2250         locker_lost++;
2251         return NULL;
2252       }
2253       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2254       state->m_wait= wait;
2255       flags|= STATE_FLAG_EVENT;
2256 
2257       PFS_events_waits *parent_event= wait - 1;
2258       wait->m_event_type= EVENT_TYPE_WAIT;
2259       wait->m_nesting_event_id= parent_event->m_event_id;
2260       wait->m_nesting_event_type= parent_event->m_event_type;
2261 
2262       wait->m_thread= pfs_thread;
2263       wait->m_class= pfs_mutex->m_class;
2264       wait->m_timer_start= timer_start;
2265       wait->m_timer_end= 0;
2266       wait->m_object_instance_addr= pfs_mutex->m_identity;
2267       wait->m_event_id= pfs_thread->m_event_id++;
2268       wait->m_end_event_id= 0;
2269       wait->m_operation= mutex_operation_map[(int) op];
2270       wait->m_source_file= src_file;
2271       wait->m_source_line= src_line;
2272       wait->m_wait_class= WAIT_CLASS_MUTEX;
2273 
2274       pfs_thread->m_events_waits_current++;
2275     }
2276   }
2277   else
2278   {
2279     if (pfs_mutex->m_timed)
2280     {
2281       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2282       state->m_timer_start= timer_start;
2283       flags= STATE_FLAG_TIMED;
2284       state->m_thread= NULL;
2285     }
2286     else
2287     {
2288       /*
2289         Complete shortcut.
2290       */
2291       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2292       pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
2293       return NULL;
2294     }
2295   }
2296 
2297   state->m_flags= flags;
2298   state->m_mutex= mutex;
2299   return reinterpret_cast<PSI_mutex_locker*> (state);
2300 }
2301 
2302 /**
2303   Implementation of the rwlock instrumentation interface.
2304   @sa PSI_v1::start_rwlock_rdwait
2305   @sa PSI_v1::start_rwlock_wrwait
2306 */
2307 static PSI_rwlock_locker*
start_rwlock_wait_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op,const char * src_file,uint src_line)2308 start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
2309                      PSI_rwlock *rwlock,
2310                      PSI_rwlock_operation op,
2311                      const char *src_file, uint src_line)
2312 {
2313   PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
2314   DBUG_ASSERT(static_cast<int> (op) >= 0);
2315   DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
2316   DBUG_ASSERT(state != NULL);
2317   DBUG_ASSERT(pfs_rwlock != NULL);
2318   DBUG_ASSERT(pfs_rwlock->m_class != NULL);
2319 
2320   if (! pfs_rwlock->m_enabled)
2321     return NULL;
2322 
2323   uint flags;
2324   ulonglong timer_start= 0;
2325 
2326   if (flag_thread_instrumentation)
2327   {
2328     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2329     if (unlikely(pfs_thread == NULL))
2330       return NULL;
2331     if (! pfs_thread->m_enabled)
2332       return NULL;
2333     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2334     flags= STATE_FLAG_THREAD;
2335 
2336     if (pfs_rwlock->m_timed)
2337     {
2338       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2339       state->m_timer_start= timer_start;
2340       flags|= STATE_FLAG_TIMED;
2341     }
2342 
2343     if (flag_events_waits_current)
2344     {
2345       if (unlikely(pfs_thread->m_events_waits_current >=
2346                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2347       {
2348         locker_lost++;
2349         return NULL;
2350       }
2351       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2352       state->m_wait= wait;
2353       flags|= STATE_FLAG_EVENT;
2354 
2355       PFS_events_waits *parent_event= wait - 1;
2356       wait->m_event_type= EVENT_TYPE_WAIT;
2357       wait->m_nesting_event_id= parent_event->m_event_id;
2358       wait->m_nesting_event_type= parent_event->m_event_type;
2359 
2360       wait->m_thread= pfs_thread;
2361       wait->m_class= pfs_rwlock->m_class;
2362       wait->m_timer_start= timer_start;
2363       wait->m_timer_end= 0;
2364       wait->m_object_instance_addr= pfs_rwlock->m_identity;
2365       wait->m_event_id= pfs_thread->m_event_id++;
2366       wait->m_end_event_id= 0;
2367       wait->m_operation= rwlock_operation_map[static_cast<int> (op)];
2368       wait->m_source_file= src_file;
2369       wait->m_source_line= src_line;
2370       wait->m_wait_class= WAIT_CLASS_RWLOCK;
2371 
2372       pfs_thread->m_events_waits_current++;
2373     }
2374   }
2375   else
2376   {
2377     if (pfs_rwlock->m_timed)
2378     {
2379       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2380       state->m_timer_start= timer_start;
2381       flags= STATE_FLAG_TIMED;
2382       state->m_thread= NULL;
2383     }
2384     else
2385     {
2386       /*
2387         Complete shortcut.
2388       */
2389       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2390       pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
2391       return NULL;
2392     }
2393   }
2394 
2395   state->m_flags= flags;
2396   state->m_rwlock= rwlock;
2397   return reinterpret_cast<PSI_rwlock_locker*> (state);
2398 }
2399 
2400 /**
2401   Implementation of the cond instrumentation interface.
2402   @sa PSI_v1::start_cond_wait.
2403 */
2404 static PSI_cond_locker*
start_cond_wait_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex * mutex,PSI_cond_operation op,const char * src_file,uint src_line)2405 start_cond_wait_v1(PSI_cond_locker_state *state,
2406                    PSI_cond *cond, PSI_mutex *mutex,
2407                    PSI_cond_operation op,
2408                    const char *src_file, uint src_line)
2409 {
2410   /*
2411     Note about the unused PSI_mutex *mutex parameter:
2412     In the pthread library, a call to pthread_cond_wait()
2413     causes an unlock() + lock() on the mutex associated with the condition.
2414     This mutex operation is not instrumented, so the mutex will still
2415     appear as locked when a thread is waiting on a condition.
2416     This has no impact now, as unlock_mutex() is not recording events.
2417     When unlock_mutex() is implemented by later work logs,
2418     this parameter here will be used to adjust the mutex state,
2419     in start_cond_wait_v1() and end_cond_wait_v1().
2420   */
2421   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
2422   DBUG_ASSERT(static_cast<int> (op) >= 0);
2423   DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
2424   DBUG_ASSERT(state != NULL);
2425   DBUG_ASSERT(pfs_cond != NULL);
2426   DBUG_ASSERT(pfs_cond->m_class != NULL);
2427 
2428   if (! pfs_cond->m_enabled)
2429     return NULL;
2430 
2431   uint flags;
2432   ulonglong timer_start= 0;
2433 
2434   if (flag_thread_instrumentation)
2435   {
2436     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2437     if (unlikely(pfs_thread == NULL))
2438       return NULL;
2439     if (! pfs_thread->m_enabled)
2440       return NULL;
2441     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2442     flags= STATE_FLAG_THREAD;
2443 
2444     if (pfs_cond->m_timed)
2445     {
2446       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2447       state->m_timer_start= timer_start;
2448       flags|= STATE_FLAG_TIMED;
2449     }
2450 
2451     if (flag_events_waits_current)
2452     {
2453       if (unlikely(pfs_thread->m_events_waits_current >=
2454                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2455       {
2456         locker_lost++;
2457         return NULL;
2458       }
2459       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2460       state->m_wait= wait;
2461       flags|= STATE_FLAG_EVENT;
2462 
2463       PFS_events_waits *parent_event= wait - 1;
2464       wait->m_event_type= EVENT_TYPE_WAIT;
2465       wait->m_nesting_event_id= parent_event->m_event_id;
2466       wait->m_nesting_event_type= parent_event->m_event_type;
2467 
2468       wait->m_thread= pfs_thread;
2469       wait->m_class= pfs_cond->m_class;
2470       wait->m_timer_start= timer_start;
2471       wait->m_timer_end= 0;
2472       wait->m_object_instance_addr= pfs_cond->m_identity;
2473       wait->m_event_id= pfs_thread->m_event_id++;
2474       wait->m_end_event_id= 0;
2475       wait->m_operation= cond_operation_map[static_cast<int> (op)];
2476       wait->m_source_file= src_file;
2477       wait->m_source_line= src_line;
2478       wait->m_wait_class= WAIT_CLASS_COND;
2479 
2480       pfs_thread->m_events_waits_current++;
2481     }
2482   }
2483   else
2484   {
2485     if (pfs_cond->m_timed)
2486     {
2487       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2488       state->m_timer_start= timer_start;
2489       flags= STATE_FLAG_TIMED;
2490     }
2491     else
2492     {
2493       /*
2494         Complete shortcut.
2495       */
2496       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2497       pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
2498       return NULL;
2499     }
2500   }
2501 
2502   state->m_flags= flags;
2503   state->m_cond= cond;
2504   state->m_mutex= mutex;
2505   return reinterpret_cast<PSI_cond_locker*> (state);
2506 }
2507 
lock_flags_to_lock_type(uint flags)2508 static inline PFS_TL_LOCK_TYPE lock_flags_to_lock_type(uint flags)
2509 {
2510   enum thr_lock_type value= static_cast<enum thr_lock_type> (flags);
2511 
2512   switch (value)
2513   {
2514     case TL_READ:
2515       return PFS_TL_READ;
2516     case TL_READ_WITH_SHARED_LOCKS:
2517       return PFS_TL_READ_WITH_SHARED_LOCKS;
2518     case TL_READ_HIGH_PRIORITY:
2519       return PFS_TL_READ_HIGH_PRIORITY;
2520     case TL_READ_NO_INSERT:
2521       return PFS_TL_READ_NO_INSERT;
2522     case TL_WRITE_ALLOW_WRITE:
2523       return PFS_TL_WRITE_ALLOW_WRITE;
2524     case TL_WRITE_CONCURRENT_INSERT:
2525       return PFS_TL_WRITE_CONCURRENT_INSERT;
2526     case TL_WRITE_DELAYED:
2527       return PFS_TL_WRITE_DELAYED;
2528     case TL_WRITE_LOW_PRIORITY:
2529       return PFS_TL_WRITE_LOW_PRIORITY;
2530     case TL_WRITE:
2531       return PFS_TL_WRITE;
2532 
2533     case TL_WRITE_ONLY:
2534     case TL_IGNORE:
2535     case TL_UNLOCK:
2536     case TL_READ_DEFAULT:
2537     case TL_WRITE_DEFAULT:
2538     default:
2539       DBUG_ASSERT(false);
2540   }
2541 
2542   /* Dead code */
2543   return PFS_TL_READ;
2544 }
2545 
external_lock_flags_to_lock_type(uint flags)2546 static inline PFS_TL_LOCK_TYPE external_lock_flags_to_lock_type(uint flags)
2547 {
2548   DBUG_ASSERT(flags == F_RDLCK || flags == F_WRLCK);
2549   return (flags == F_RDLCK ? PFS_TL_READ_EXTERNAL : PFS_TL_WRITE_EXTERNAL);
2550 }
2551 
2552 /**
2553   Implementation of the table instrumentation interface.
2554   @sa PSI_v1::start_table_io_wait_v1
2555 */
2556 static PSI_table_locker*
start_table_io_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_io_operation op,uint index,const char * src_file,uint src_line)2557 start_table_io_wait_v1(PSI_table_locker_state *state,
2558                        PSI_table *table,
2559                        PSI_table_io_operation op,
2560                        uint index,
2561                        const char *src_file, uint src_line)
2562 {
2563   DBUG_ASSERT(static_cast<int> (op) >= 0);
2564   DBUG_ASSERT(static_cast<uint> (op) < array_elements(table_io_operation_map));
2565   DBUG_ASSERT(state != NULL);
2566   PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2567   DBUG_ASSERT(pfs_table != NULL);
2568   DBUG_ASSERT(pfs_table->m_share != NULL);
2569 
2570   if (! pfs_table->m_io_enabled)
2571     return NULL;
2572 
2573   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2574 
2575   uint flags;
2576   ulonglong timer_start= 0;
2577 
2578   if (flag_thread_instrumentation)
2579   {
2580     if (pfs_thread == NULL)
2581       return NULL;
2582     if (! pfs_thread->m_enabled)
2583       return NULL;
2584     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2585     flags= STATE_FLAG_THREAD;
2586 
2587     if (pfs_table->m_io_timed)
2588     {
2589       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2590       state->m_timer_start= timer_start;
2591       flags|= STATE_FLAG_TIMED;
2592     }
2593 
2594     if (flag_events_waits_current)
2595     {
2596       if (unlikely(pfs_thread->m_events_waits_current >=
2597                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2598       {
2599         locker_lost++;
2600         return NULL;
2601       }
2602       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2603       state->m_wait= wait;
2604       flags|= STATE_FLAG_EVENT;
2605 
2606       PFS_events_waits *parent_event= wait - 1;
2607       wait->m_event_type= EVENT_TYPE_WAIT;
2608       wait->m_nesting_event_id= parent_event->m_event_id;
2609       wait->m_nesting_event_type= parent_event->m_event_type;
2610 
2611       PFS_table_share *share= pfs_table->m_share;
2612       wait->m_thread= pfs_thread;
2613       wait->m_class= &global_table_io_class;
2614       wait->m_timer_start= timer_start;
2615       wait->m_timer_end= 0;
2616       wait->m_object_instance_addr= pfs_table->m_identity;
2617       wait->m_event_id= pfs_thread->m_event_id++;
2618       wait->m_end_event_id= 0;
2619       wait->m_operation= table_io_operation_map[static_cast<int> (op)];
2620       wait->m_flags= 0;
2621       wait->m_object_type= share->get_object_type();
2622       wait->m_weak_table_share= share;
2623       wait->m_weak_version= share->get_version();
2624       wait->m_index= index;
2625       wait->m_source_file= src_file;
2626       wait->m_source_line= src_line;
2627       wait->m_wait_class= WAIT_CLASS_TABLE;
2628 
2629       pfs_thread->m_events_waits_current++;
2630     }
2631   }
2632   else
2633   {
2634     if (pfs_table->m_io_timed)
2635     {
2636       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2637       state->m_timer_start= timer_start;
2638       flags= STATE_FLAG_TIMED;
2639     }
2640     else
2641     {
2642       /* TODO: consider a shortcut here */
2643       flags= 0;
2644     }
2645   }
2646 
2647   state->m_flags= flags;
2648   state->m_table= table;
2649   state->m_io_operation= op;
2650   state->m_index= index;
2651   return reinterpret_cast<PSI_table_locker*> (state);
2652 }
2653 
2654 /**
2655   Implementation of the table instrumentation interface.
2656   @sa PSI_v1::start_table_lock_wait.
2657 */
2658 static PSI_table_locker*
start_table_lock_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_lock_operation op,ulong op_flags,const char * src_file,uint src_line)2659 start_table_lock_wait_v1(PSI_table_locker_state *state,
2660                          PSI_table *table,
2661                          PSI_table_lock_operation op,
2662                          ulong op_flags,
2663                          const char *src_file, uint src_line)
2664 {
2665   DBUG_ASSERT(state != NULL);
2666   DBUG_ASSERT((op == PSI_TABLE_LOCK) || (op == PSI_TABLE_EXTERNAL_LOCK));
2667 
2668   PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2669 
2670   DBUG_ASSERT(pfs_table != NULL);
2671   DBUG_ASSERT(pfs_table->m_share != NULL);
2672 
2673   if (! pfs_table->m_lock_enabled)
2674     return NULL;
2675 
2676   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2677 
2678   PFS_TL_LOCK_TYPE lock_type;
2679 
2680   switch (op)
2681   {
2682     case PSI_TABLE_LOCK:
2683       lock_type= lock_flags_to_lock_type(op_flags);
2684       break;
2685     case PSI_TABLE_EXTERNAL_LOCK:
2686       /*
2687         See the handler::external_lock() API design,
2688         there is no handler::external_unlock().
2689       */
2690       if (op_flags == F_UNLCK)
2691         return NULL;
2692       lock_type= external_lock_flags_to_lock_type(op_flags);
2693       break;
2694     default:
2695       lock_type= PFS_TL_READ;
2696       DBUG_ASSERT(false);
2697   }
2698 
2699   DBUG_ASSERT((uint) lock_type < array_elements(table_lock_operation_map));
2700 
2701   uint flags;
2702   ulonglong timer_start= 0;
2703 
2704   if (flag_thread_instrumentation)
2705   {
2706     if (pfs_thread == NULL)
2707       return NULL;
2708     if (! pfs_thread->m_enabled)
2709       return NULL;
2710     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2711     flags= STATE_FLAG_THREAD;
2712 
2713     if (pfs_table->m_lock_timed)
2714     {
2715       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2716       state->m_timer_start= timer_start;
2717       flags|= STATE_FLAG_TIMED;
2718     }
2719 
2720     if (flag_events_waits_current)
2721     {
2722       if (unlikely(pfs_thread->m_events_waits_current >=
2723                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2724       {
2725         locker_lost++;
2726         return NULL;
2727       }
2728       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2729       state->m_wait= wait;
2730       flags|= STATE_FLAG_EVENT;
2731 
2732       PFS_events_waits *parent_event= wait - 1;
2733       wait->m_event_type= EVENT_TYPE_WAIT;
2734       wait->m_nesting_event_id= parent_event->m_event_id;
2735       wait->m_nesting_event_type= parent_event->m_event_type;
2736 
2737       PFS_table_share *share= pfs_table->m_share;
2738       wait->m_thread= pfs_thread;
2739       wait->m_class= &global_table_lock_class;
2740       wait->m_timer_start= timer_start;
2741       wait->m_timer_end= 0;
2742       wait->m_object_instance_addr= pfs_table->m_identity;
2743       wait->m_event_id= pfs_thread->m_event_id++;
2744       wait->m_end_event_id= 0;
2745       wait->m_operation= table_lock_operation_map[lock_type];
2746       wait->m_flags= 0;
2747       wait->m_object_type= share->get_object_type();
2748       wait->m_weak_table_share= share;
2749       wait->m_weak_version= share->get_version();
2750       wait->m_index= 0;
2751       wait->m_source_file= src_file;
2752       wait->m_source_line= src_line;
2753       wait->m_wait_class= WAIT_CLASS_TABLE;
2754 
2755       pfs_thread->m_events_waits_current++;
2756     }
2757   }
2758   else
2759   {
2760     if (pfs_table->m_lock_timed)
2761     {
2762       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2763       state->m_timer_start= timer_start;
2764       flags= STATE_FLAG_TIMED;
2765     }
2766     else
2767     {
2768       /* TODO: consider a shortcut here */
2769       flags= 0;
2770     }
2771   }
2772 
2773   state->m_flags= flags;
2774   state->m_table= table;
2775   state->m_index= lock_type;
2776   return reinterpret_cast<PSI_table_locker*> (state);
2777 }
2778 
2779 /**
2780   Implementation of the file instrumentation interface.
2781   @sa PSI_v1::get_thread_file_name_locker.
2782 */
2783 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)2784 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
2785                                PSI_file_key key,
2786                                PSI_file_operation op,
2787                                const char *name, const void *identity)
2788 {
2789   DBUG_ASSERT(static_cast<int> (op) >= 0);
2790   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2791   DBUG_ASSERT(state != NULL);
2792 
2793   if (psi_unlikely(! flag_global_instrumentation))
2794     return NULL;
2795   PFS_file_class *klass= find_file_class(key);
2796   if (unlikely(klass == NULL))
2797     return NULL;
2798   if (! klass->m_enabled)
2799     return NULL;
2800 
2801   /* Needed for the LF_HASH */
2802   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2803   if (unlikely(pfs_thread == NULL))
2804     return NULL;
2805 
2806   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
2807     return NULL;
2808 
2809   uint flags;
2810 
2811   state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2812   flags= STATE_FLAG_THREAD;
2813 
2814   if (klass->m_timed)
2815     flags|= STATE_FLAG_TIMED;
2816 
2817   if (flag_events_waits_current)
2818   {
2819     if (unlikely(pfs_thread->m_events_waits_current >=
2820                  & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2821     {
2822       locker_lost++;
2823       return NULL;
2824     }
2825     PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2826     state->m_wait= wait;
2827     flags|= STATE_FLAG_EVENT;
2828 
2829     PFS_events_waits *parent_event= wait - 1;
2830     wait->m_event_type= EVENT_TYPE_WAIT;
2831     wait->m_nesting_event_id= parent_event->m_event_id;
2832     wait->m_nesting_event_type= parent_event->m_event_type;
2833 
2834     wait->m_thread= pfs_thread;
2835     wait->m_class= klass;
2836     wait->m_timer_start= 0;
2837     wait->m_timer_end= 0;
2838     wait->m_object_instance_addr= NULL;
2839     wait->m_weak_file= NULL;
2840     wait->m_weak_version= 0;
2841     wait->m_event_id= pfs_thread->m_event_id++;
2842     wait->m_end_event_id= 0;
2843     wait->m_operation= file_operation_map[static_cast<int> (op)];
2844     wait->m_wait_class= WAIT_CLASS_FILE;
2845 
2846     pfs_thread->m_events_waits_current++;
2847   }
2848 
2849   state->m_flags= flags;
2850   state->m_file= NULL;
2851   state->m_name= name;
2852   state->m_class= klass;
2853   state->m_operation= op;
2854   return reinterpret_cast<PSI_file_locker*> (state);
2855 }
2856 
2857 /**
2858   Implementation of the file instrumentation interface.
2859   @sa PSI_v1::get_thread_file_stream_locker.
2860 */
2861 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)2862 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
2863                                  PSI_file *file, PSI_file_operation op)
2864 {
2865   PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
2866   DBUG_ASSERT(static_cast<int> (op) >= 0);
2867   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2868   DBUG_ASSERT(state != NULL);
2869 
2870   if (unlikely(pfs_file == NULL))
2871     return NULL;
2872   DBUG_ASSERT(pfs_file->m_class != NULL);
2873   PFS_file_class *klass= pfs_file->m_class;
2874 
2875   if (! pfs_file->m_enabled)
2876     return NULL;
2877 
2878   uint flags;
2879 
2880   if (flag_thread_instrumentation)
2881   {
2882     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2883     if (unlikely(pfs_thread == NULL))
2884       return NULL;
2885     if (! pfs_thread->m_enabled)
2886       return NULL;
2887     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2888     flags= STATE_FLAG_THREAD;
2889 
2890     if (pfs_file->m_timed)
2891       flags|= STATE_FLAG_TIMED;
2892 
2893     if (flag_events_waits_current)
2894     {
2895       if (unlikely(pfs_thread->m_events_waits_current >=
2896                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2897       {
2898         locker_lost++;
2899         return NULL;
2900       }
2901       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2902       state->m_wait= wait;
2903       flags|= STATE_FLAG_EVENT;
2904 
2905       PFS_events_waits *parent_event= wait - 1;
2906       wait->m_event_type= EVENT_TYPE_WAIT;
2907       wait->m_nesting_event_id= parent_event->m_event_id;
2908       wait->m_nesting_event_type= parent_event->m_event_type;
2909 
2910       wait->m_thread= pfs_thread;
2911       wait->m_class= klass;
2912       wait->m_timer_start= 0;
2913       wait->m_timer_end= 0;
2914       wait->m_object_instance_addr= pfs_file;
2915       wait->m_weak_file= pfs_file;
2916       wait->m_weak_version= pfs_file->get_version();
2917       wait->m_event_id= pfs_thread->m_event_id++;
2918       wait->m_end_event_id= 0;
2919       wait->m_operation= file_operation_map[static_cast<int> (op)];
2920       wait->m_wait_class= WAIT_CLASS_FILE;
2921 
2922       pfs_thread->m_events_waits_current++;
2923     }
2924   }
2925   else
2926   {
2927     state->m_thread= NULL;
2928     if (pfs_file->m_timed)
2929     {
2930       flags= STATE_FLAG_TIMED;
2931     }
2932     else
2933     {
2934       /* TODO: consider a shortcut. */
2935       flags= 0;
2936     }
2937   }
2938 
2939   state->m_flags= flags;
2940   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
2941   state->m_operation= op;
2942   state->m_name= NULL;
2943   state->m_class= klass;
2944   return reinterpret_cast<PSI_file_locker*> (state);
2945 }
2946 
2947 /**
2948   Implementation of the file instrumentation interface.
2949   @sa PSI_v1::get_thread_file_descriptor_locker.
2950 */
2951 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)2952 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
2953                                      File file, PSI_file_operation op)
2954 {
2955   int index= static_cast<int> (file);
2956   DBUG_ASSERT(static_cast<int> (op) >= 0);
2957   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2958   DBUG_ASSERT(state != NULL);
2959 
2960   if (unlikely((index < 0) || (index >= file_handle_max)))
2961     return NULL;
2962 
2963   PFS_file *pfs_file= file_handle_array[index];
2964   if (unlikely(pfs_file == NULL))
2965     return NULL;
2966 
2967   /*
2968     We are about to close a file by descriptor number,
2969     and the calling code still holds the descriptor.
2970     Cleanup the file descriptor <--> file instrument association.
2971     Remove the instrumentation *before* the close to avoid race
2972     conditions with another thread opening a file
2973     (that could be given the same descriptor).
2974   */
2975   if (op == PSI_FILE_CLOSE)
2976     file_handle_array[index]= NULL;
2977 
2978   if (! pfs_file->m_enabled)
2979     return NULL;
2980 
2981   DBUG_ASSERT(pfs_file->m_class != NULL);
2982   PFS_file_class *klass= pfs_file->m_class;
2983 
2984   uint flags;
2985 
2986   if (flag_thread_instrumentation)
2987   {
2988     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2989     if (unlikely(pfs_thread == NULL))
2990       return NULL;
2991     if (! pfs_thread->m_enabled)
2992       return NULL;
2993     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2994     flags= STATE_FLAG_THREAD;
2995 
2996     if (pfs_file->m_timed)
2997       flags|= STATE_FLAG_TIMED;
2998 
2999     if (flag_events_waits_current)
3000     {
3001       if (unlikely(pfs_thread->m_events_waits_current >=
3002                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3003       {
3004         locker_lost++;
3005         return NULL;
3006       }
3007       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3008       state->m_wait= wait;
3009       flags|= STATE_FLAG_EVENT;
3010 
3011       PFS_events_waits *parent_event= wait - 1;
3012       wait->m_event_type= EVENT_TYPE_WAIT;
3013       wait->m_nesting_event_id= parent_event->m_event_id;
3014       wait->m_nesting_event_type= parent_event->m_event_type;
3015 
3016       wait->m_thread= pfs_thread;
3017       wait->m_class= klass;
3018       wait->m_timer_start= 0;
3019       wait->m_timer_end= 0;
3020       wait->m_object_instance_addr= pfs_file;
3021       wait->m_weak_file= pfs_file;
3022       wait->m_weak_version= pfs_file->get_version();
3023       wait->m_event_id= pfs_thread->m_event_id++;
3024       wait->m_end_event_id= 0;
3025       wait->m_operation= file_operation_map[static_cast<int> (op)];
3026       wait->m_wait_class= WAIT_CLASS_FILE;
3027 
3028       pfs_thread->m_events_waits_current++;
3029     }
3030   }
3031   else
3032   {
3033     state->m_thread= NULL;
3034     if (pfs_file->m_timed)
3035     {
3036       flags= STATE_FLAG_TIMED;
3037     }
3038     else
3039     {
3040       /* TODO: consider a shortcut. */
3041       flags= 0;
3042     }
3043   }
3044 
3045   state->m_flags= flags;
3046   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3047   state->m_operation= op;
3048   state->m_name= NULL;
3049   state->m_class= klass;
3050   return reinterpret_cast<PSI_file_locker*> (state);
3051 }
3052 
3053 /** Socket locker */
3054 
3055 static PSI_socket_locker*
start_socket_wait_v1(PSI_socket_locker_state * state,PSI_socket * socket,PSI_socket_operation op,size_t count,const char * src_file,uint src_line)3056 start_socket_wait_v1(PSI_socket_locker_state *state,
3057                      PSI_socket *socket,
3058                      PSI_socket_operation op,
3059                      size_t count,
3060                      const char *src_file, uint src_line)
3061 {
3062   DBUG_ASSERT(static_cast<int> (op) >= 0);
3063   DBUG_ASSERT(static_cast<uint> (op) < array_elements(socket_operation_map));
3064   DBUG_ASSERT(state != NULL);
3065   PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*> (socket);
3066 
3067   DBUG_ASSERT(pfs_socket != NULL);
3068   DBUG_ASSERT(pfs_socket->m_class != NULL);
3069 
3070   if (!pfs_socket->m_enabled || pfs_socket->m_idle)
3071     return NULL;
3072 
3073   uint flags= 0;
3074   ulonglong timer_start= 0;
3075 
3076   if (flag_thread_instrumentation)
3077   {
3078     /*
3079        Do not use pfs_socket->m_thread_owner here,
3080        as different threads may use concurrently the same socket,
3081        for example during a KILL.
3082     */
3083     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3084 
3085     if (unlikely(pfs_thread == NULL))
3086       return NULL;
3087 
3088     if (!pfs_thread->m_enabled)
3089       return NULL;
3090 
3091     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3092     flags= STATE_FLAG_THREAD;
3093 
3094     if (pfs_socket->m_timed)
3095     {
3096       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3097       state->m_timer_start= timer_start;
3098       flags|= STATE_FLAG_TIMED;
3099     }
3100 
3101     if (flag_events_waits_current)
3102     {
3103       if (unlikely(pfs_thread->m_events_waits_current >=
3104                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3105       {
3106         locker_lost++;
3107         return NULL;
3108       }
3109       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3110       state->m_wait= wait;
3111       flags|= STATE_FLAG_EVENT;
3112 
3113       PFS_events_waits *parent_event= wait - 1;
3114       wait->m_event_type= EVENT_TYPE_WAIT;
3115       wait->m_nesting_event_id=   parent_event->m_event_id;
3116       wait->m_nesting_event_type= parent_event->m_event_type;
3117       wait->m_thread=       pfs_thread;
3118       wait->m_class=        pfs_socket->m_class;
3119       wait->m_timer_start=  timer_start;
3120       wait->m_timer_end=    0;
3121       wait->m_object_instance_addr= pfs_socket->m_identity;
3122       wait->m_weak_socket=  pfs_socket;
3123       wait->m_weak_version= pfs_socket->get_version();
3124       wait->m_event_id=     pfs_thread->m_event_id++;
3125       wait->m_end_event_id= 0;
3126       wait->m_operation=    socket_operation_map[static_cast<int>(op)];
3127       wait->m_source_file= src_file;
3128       wait->m_source_line= src_line;
3129       wait->m_number_of_bytes= count;
3130       wait->m_wait_class=   WAIT_CLASS_SOCKET;
3131 
3132       pfs_thread->m_events_waits_current++;
3133     }
3134   }
3135   else
3136   {
3137     if (pfs_socket->m_timed)
3138     {
3139       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3140       state->m_timer_start= timer_start;
3141       flags= STATE_FLAG_TIMED;
3142     }
3143     else
3144     {
3145       /*
3146         Even if timing is disabled, end_socket_wait() still needs a locker to
3147         capture the number of bytes sent or received by the socket operation.
3148         For operations that do not have a byte count, then just increment the
3149         event counter and return a NULL locker.
3150       */
3151       switch (op)
3152       {
3153         case PSI_SOCKET_CONNECT:
3154         case PSI_SOCKET_CREATE:
3155         case PSI_SOCKET_BIND:
3156         case PSI_SOCKET_SEEK:
3157         case PSI_SOCKET_OPT:
3158         case PSI_SOCKET_STAT:
3159         case PSI_SOCKET_SHUTDOWN:
3160         case PSI_SOCKET_CLOSE:
3161         case PSI_SOCKET_SELECT:
3162           pfs_socket->m_socket_stat.m_io_stat.m_misc.aggregate_counted();
3163           return NULL;
3164         default:
3165           break;
3166       }
3167     }
3168   }
3169 
3170   state->m_flags= flags;
3171   state->m_socket= socket;
3172   state->m_operation= op;
3173   return reinterpret_cast<PSI_socket_locker*> (state);
3174 }
3175 
3176 /**
3177   Implementation of the mutex instrumentation interface.
3178   @sa PSI_v1::unlock_mutex.
3179 */
unlock_mutex_v1(PSI_mutex * mutex)3180 static void unlock_mutex_v1(PSI_mutex *mutex)
3181 {
3182   PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
3183 
3184   DBUG_ASSERT(pfs_mutex != NULL);
3185 
3186   /*
3187     Note that this code is still protected by the instrumented mutex,
3188     and therefore is thread safe. See inline_mysql_mutex_unlock().
3189   */
3190 
3191   /* Always update the instrumented state */
3192   pfs_mutex->m_owner= NULL;
3193   pfs_mutex->m_last_locked= 0;
3194 
3195 #ifdef LATER_WL2333
3196   /*
3197     See WL#2333: SHOW ENGINE ... LOCK STATUS.
3198     PFS_mutex::m_lock_stat is not exposed in user visible tables
3199     currently, so there is no point spending time computing it.
3200   */
3201   if (! pfs_mutex->m_enabled)
3202     return;
3203 
3204   if (! pfs_mutex->m_timed)
3205     return;
3206 
3207   ulonglong locked_time;
3208   locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
3209   pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
3210 #endif
3211 }
3212 
3213 /**
3214   Implementation of the rwlock instrumentation interface.
3215   @sa PSI_v1::unlock_rwlock.
3216 */
unlock_rwlock_v1(PSI_rwlock * rwlock)3217 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
3218 {
3219   PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
3220   DBUG_ASSERT(pfs_rwlock != NULL);
3221   DBUG_ASSERT(pfs_rwlock == sanitize_rwlock(pfs_rwlock));
3222   DBUG_ASSERT(pfs_rwlock->m_class != NULL);
3223   DBUG_ASSERT(pfs_rwlock->m_lock.is_populated());
3224 
3225   bool last_writer= false;
3226   bool last_reader= false;
3227 
3228   /*
3229     Note that this code is still protected by the instrumented rwlock,
3230     and therefore is:
3231     - thread safe for write locks
3232     - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
3233     See inline_mysql_rwlock_unlock()
3234   */
3235 
3236   /* Always update the instrumented state */
3237   if (pfs_rwlock->m_writer != NULL)
3238   {
3239     /* Nominal case, a writer is unlocking. */
3240     last_writer= true;
3241     pfs_rwlock->m_writer= NULL;
3242     /* Reset the readers stats, they could be off */
3243     pfs_rwlock->m_readers= 0;
3244   }
3245   else if (likely(pfs_rwlock->m_readers > 0))
3246   {
3247     /* Nominal case, a reader is unlocking. */
3248     if (--(pfs_rwlock->m_readers) == 0)
3249       last_reader= true;
3250   }
3251   else
3252   {
3253     /*
3254       Edge case, we have no writer and no readers,
3255       on an unlock event.
3256       This is possible for:
3257       - partial instrumentation
3258       - instrumentation disabled at runtime,
3259         see when get_thread_rwlock_locker_v1() returns NULL
3260       No further action is taken here, the next
3261       write lock will put the statistics is a valid state.
3262     */
3263   }
3264 
3265 #ifdef LATER_WL2333
3266   /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
3267 
3268   if (! pfs_rwlock->m_enabled)
3269     return;
3270 
3271   if (! pfs_rwlock->m_timed)
3272     return;
3273 
3274   ulonglong locked_time;
3275   if (last_writer)
3276   {
3277     locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
3278     pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
3279   }
3280   else if (last_reader)
3281   {
3282     locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
3283     pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
3284   }
3285 #else
3286   (void) last_reader;
3287   (void) last_writer;
3288 #endif
3289 }
3290 
3291 /**
3292   Implementation of the cond instrumentation interface.
3293   @sa PSI_v1::signal_cond.
3294 */
signal_cond_v1(PSI_cond * cond)3295 static void signal_cond_v1(PSI_cond* cond)
3296 {
3297   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3298 
3299   DBUG_ASSERT(pfs_cond != NULL);
3300 
3301   pfs_cond->m_cond_stat.m_signal_count++;
3302 }
3303 
3304 /**
3305   Implementation of the cond instrumentation interface.
3306   @sa PSI_v1::broadcast_cond.
3307 */
broadcast_cond_v1(PSI_cond * cond)3308 static void broadcast_cond_v1(PSI_cond* cond)
3309 {
3310   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3311 
3312   DBUG_ASSERT(pfs_cond != NULL);
3313 
3314   pfs_cond->m_cond_stat.m_broadcast_count++;
3315 }
3316 
3317 /**
3318   Implementation of the idle instrumentation interface.
3319   @sa PSI_v1::start_idle_wait.
3320 */
3321 static PSI_idle_locker*
start_idle_wait_v1(PSI_idle_locker_state * state,const char * src_file,uint src_line)3322 start_idle_wait_v1(PSI_idle_locker_state* state, const char *src_file, uint src_line)
3323 {
3324   DBUG_ASSERT(state != NULL);
3325 
3326   if (psi_unlikely(! flag_global_instrumentation))
3327     return NULL;
3328 
3329   if (!global_idle_class.m_enabled)
3330     return NULL;
3331 
3332   uint flags= 0;
3333   ulonglong timer_start= 0;
3334 
3335   if (flag_thread_instrumentation)
3336   {
3337     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3338     if (unlikely(pfs_thread == NULL))
3339       return NULL;
3340     if (!pfs_thread->m_enabled)
3341       return NULL;
3342     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3343     flags= STATE_FLAG_THREAD;
3344 
3345     DBUG_ASSERT(pfs_thread->m_events_statements_count == 0);
3346 
3347     if (global_idle_class.m_timed)
3348     {
3349       timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3350       state->m_timer_start= timer_start;
3351       flags|= STATE_FLAG_TIMED;
3352     }
3353 
3354     if (flag_events_waits_current)
3355     {
3356       if (unlikely(pfs_thread->m_events_waits_current >=
3357                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3358       {
3359         locker_lost++;
3360         return NULL;
3361       }
3362       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3363       state->m_wait= wait;
3364       flags|= STATE_FLAG_EVENT;
3365 
3366       wait->m_event_type= EVENT_TYPE_WAIT;
3367       /*
3368         IDLE events are waits, but by definition we know that
3369         such waits happen outside of any STAGE and STATEMENT,
3370         so they have no parents.
3371       */
3372       wait->m_nesting_event_id= 0;
3373       /* no need to set wait->m_nesting_event_type */
3374 
3375       wait->m_thread= pfs_thread;
3376       wait->m_class= &global_idle_class;
3377       wait->m_timer_start= timer_start;
3378       wait->m_timer_end= 0;
3379       wait->m_event_id= pfs_thread->m_event_id++;
3380       wait->m_end_event_id= 0;
3381       wait->m_operation= OPERATION_TYPE_IDLE;
3382       wait->m_source_file= src_file;
3383       wait->m_source_line= src_line;
3384       wait->m_wait_class= WAIT_CLASS_IDLE;
3385 
3386       pfs_thread->m_events_waits_current++;
3387     }
3388   }
3389   else
3390   {
3391     if (global_idle_class.m_timed)
3392     {
3393       timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3394       state->m_timer_start= timer_start;
3395       flags= STATE_FLAG_TIMED;
3396     }
3397   }
3398 
3399   state->m_flags= flags;
3400   return reinterpret_cast<PSI_idle_locker*> (state);
3401 }
3402 
3403 /**
3404   Implementation of the mutex instrumentation interface.
3405   @sa PSI_v1::end_idle_wait.
3406 */
end_idle_wait_v1(PSI_idle_locker * locker)3407 static void end_idle_wait_v1(PSI_idle_locker* locker)
3408 {
3409   PSI_idle_locker_state *state= reinterpret_cast<PSI_idle_locker_state*> (locker);
3410   DBUG_ASSERT(state != NULL);
3411   ulonglong timer_end= 0;
3412   ulonglong wait_time= 0;
3413 
3414   uint flags= state->m_flags;
3415 
3416   if (flags & STATE_FLAG_TIMED)
3417   {
3418     timer_end= state->m_timer();
3419     wait_time= timer_end - state->m_timer_start;
3420   }
3421 
3422   if (flags & STATE_FLAG_THREAD)
3423   {
3424     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3425     PFS_single_stat *event_name_array;
3426     event_name_array= thread->m_instr_class_waits_stats;
3427 
3428     if (flags & STATE_FLAG_TIMED)
3429     {
3430       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3431       event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
3432     }
3433     else
3434     {
3435       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3436       event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
3437     }
3438 
3439     if (flags & STATE_FLAG_EVENT)
3440     {
3441       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3442       DBUG_ASSERT(wait != NULL);
3443 
3444       wait->m_timer_end= timer_end;
3445       wait->m_end_event_id= thread->m_event_id;
3446       if (flag_events_waits_history)
3447         insert_events_waits_history(thread, wait);
3448       if (flag_events_waits_history_long)
3449         insert_events_waits_history_long(wait);
3450       thread->m_events_waits_current--;
3451 
3452       DBUG_ASSERT(wait == thread->m_events_waits_current);
3453     }
3454   }
3455 
3456   if (flags & STATE_FLAG_TIMED)
3457   {
3458     /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
3459     global_idle_stat.aggregate_value(wait_time);
3460   }
3461   else
3462   {
3463     /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
3464     global_idle_stat.aggregate_counted();
3465   }
3466 }
3467 
3468 /**
3469   Implementation of the mutex instrumentation interface.
3470   @sa PSI_v1::end_mutex_wait.
3471 */
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)3472 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
3473 {
3474   PSI_mutex_locker_state *state= reinterpret_cast<PSI_mutex_locker_state*> (locker);
3475   DBUG_ASSERT(state != NULL);
3476 
3477   ulonglong timer_end= 0;
3478   ulonglong wait_time= 0;
3479 
3480   PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex);
3481   DBUG_ASSERT(mutex != NULL);
3482   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3483 
3484   uint flags= state->m_flags;
3485 
3486   if (flags & STATE_FLAG_TIMED)
3487   {
3488     timer_end= state->m_timer();
3489     wait_time= timer_end - state->m_timer_start;
3490     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3491     mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
3492   }
3493   else
3494   {
3495     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3496     mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
3497   }
3498 
3499   if (likely(rc == 0))
3500   {
3501     mutex->m_owner= thread;
3502     mutex->m_last_locked= timer_end;
3503   }
3504 
3505   if (flags & STATE_FLAG_THREAD)
3506   {
3507     PFS_single_stat *event_name_array;
3508     event_name_array= thread->m_instr_class_waits_stats;
3509     uint index= mutex->m_class->m_event_name_index;
3510 
3511     if (flags & STATE_FLAG_TIMED)
3512     {
3513       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3514       event_name_array[index].aggregate_value(wait_time);
3515     }
3516     else
3517     {
3518       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3519       event_name_array[index].aggregate_counted();
3520     }
3521 
3522     if (flags & STATE_FLAG_EVENT)
3523     {
3524       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3525       DBUG_ASSERT(wait != NULL);
3526 
3527       wait->m_timer_end= timer_end;
3528       wait->m_end_event_id= thread->m_event_id;
3529       if (flag_events_waits_history)
3530         insert_events_waits_history(thread, wait);
3531       if (flag_events_waits_history_long)
3532         insert_events_waits_history_long(wait);
3533       thread->m_events_waits_current--;
3534 
3535       DBUG_ASSERT(wait == thread->m_events_waits_current);
3536     }
3537   }
3538 }
3539 
3540 /**
3541   Implementation of the rwlock instrumentation interface.
3542   @sa PSI_v1::end_rwlock_rdwait.
3543 */
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)3544 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
3545 {
3546   PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3547   DBUG_ASSERT(state != NULL);
3548 
3549   ulonglong timer_end= 0;
3550   ulonglong wait_time= 0;
3551 
3552   PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3553   DBUG_ASSERT(rwlock != NULL);
3554 
3555   if (state->m_flags & STATE_FLAG_TIMED)
3556   {
3557     timer_end= state->m_timer();
3558     wait_time= timer_end - state->m_timer_start;
3559     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3560     rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3561   }
3562   else
3563   {
3564     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3565     rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3566   }
3567 
3568   if (rc == 0)
3569   {
3570     /*
3571       Warning:
3572       Multiple threads can execute this section concurrently
3573       (since multiple readers can execute in parallel).
3574       The statistics generated are not safe, which is why they are
3575       just statistics, not facts.
3576     */
3577     if (rwlock->m_readers == 0)
3578       rwlock->m_last_read= timer_end;
3579     rwlock->m_writer= NULL;
3580     rwlock->m_readers++;
3581   }
3582 
3583   if (state->m_flags & STATE_FLAG_THREAD)
3584   {
3585     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3586     DBUG_ASSERT(thread != NULL);
3587 
3588     PFS_single_stat *event_name_array;
3589     event_name_array= thread->m_instr_class_waits_stats;
3590     uint index= rwlock->m_class->m_event_name_index;
3591 
3592     if (state->m_flags & STATE_FLAG_TIMED)
3593     {
3594       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3595       event_name_array[index].aggregate_value(wait_time);
3596     }
3597     else
3598     {
3599       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3600       event_name_array[index].aggregate_counted();
3601     }
3602 
3603     if (state->m_flags & STATE_FLAG_EVENT)
3604     {
3605       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3606       DBUG_ASSERT(wait != NULL);
3607 
3608       wait->m_timer_end= timer_end;
3609       wait->m_end_event_id= thread->m_event_id;
3610       if (flag_events_waits_history)
3611         insert_events_waits_history(thread, wait);
3612       if (flag_events_waits_history_long)
3613         insert_events_waits_history_long(wait);
3614       thread->m_events_waits_current--;
3615 
3616       DBUG_ASSERT(wait == thread->m_events_waits_current);
3617     }
3618   }
3619 }
3620 
3621 /**
3622   Implementation of the rwlock instrumentation interface.
3623   @sa PSI_v1::end_rwlock_wrwait.
3624 */
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)3625 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
3626 {
3627   PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3628   DBUG_ASSERT(state != NULL);
3629 
3630   ulonglong timer_end= 0;
3631   ulonglong wait_time= 0;
3632 
3633   PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3634   DBUG_ASSERT(rwlock != NULL);
3635   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3636 
3637   if (state->m_flags & STATE_FLAG_TIMED)
3638   {
3639     timer_end= state->m_timer();
3640     wait_time= timer_end - state->m_timer_start;
3641     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3642     rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3643   }
3644   else
3645   {
3646     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3647     rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3648   }
3649 
3650   if (likely(rc == 0))
3651   {
3652     /* Thread safe : we are protected by the instrumented rwlock */
3653     rwlock->m_writer= thread;
3654     rwlock->m_last_written= timer_end;
3655     /* Reset the readers stats, they could be off */
3656     rwlock->m_readers= 0;
3657     rwlock->m_last_read= 0;
3658   }
3659 
3660   if (state->m_flags & STATE_FLAG_THREAD)
3661   {
3662     PFS_single_stat *event_name_array;
3663     event_name_array= thread->m_instr_class_waits_stats;
3664     uint index= rwlock->m_class->m_event_name_index;
3665 
3666     if (state->m_flags & STATE_FLAG_TIMED)
3667     {
3668       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3669       event_name_array[index].aggregate_value(wait_time);
3670     }
3671     else
3672     {
3673       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3674       event_name_array[index].aggregate_counted();
3675     }
3676 
3677     if (state->m_flags & STATE_FLAG_EVENT)
3678     {
3679       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3680       DBUG_ASSERT(wait != NULL);
3681 
3682       wait->m_timer_end= timer_end;
3683       wait->m_end_event_id= thread->m_event_id;
3684       if (flag_events_waits_history)
3685         insert_events_waits_history(thread, wait);
3686       if (flag_events_waits_history_long)
3687         insert_events_waits_history_long(wait);
3688       thread->m_events_waits_current--;
3689 
3690       DBUG_ASSERT(wait == thread->m_events_waits_current);
3691     }
3692   }
3693 }
3694 
3695 /**
3696   Implementation of the cond instrumentation interface.
3697   @sa PSI_v1::end_cond_wait.
3698 */
end_cond_wait_v1(PSI_cond_locker * locker,int rc)3699 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
3700 {
3701   PSI_cond_locker_state *state= reinterpret_cast<PSI_cond_locker_state*> (locker);
3702   DBUG_ASSERT(state != NULL);
3703 
3704   ulonglong timer_end= 0;
3705   ulonglong wait_time= 0;
3706 
3707   PFS_cond *cond= reinterpret_cast<PFS_cond *> (state->m_cond);
3708   /* PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex); */
3709 
3710   if (state->m_flags & STATE_FLAG_TIMED)
3711   {
3712     timer_end= state->m_timer();
3713     wait_time= timer_end - state->m_timer_start;
3714     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3715     cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
3716   }
3717   else
3718   {
3719     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3720     cond->m_cond_stat.m_wait_stat.aggregate_counted();
3721   }
3722 
3723   if (state->m_flags & STATE_FLAG_THREAD)
3724   {
3725     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3726     DBUG_ASSERT(thread != NULL);
3727 
3728     PFS_single_stat *event_name_array;
3729     event_name_array= thread->m_instr_class_waits_stats;
3730     uint index= cond->m_class->m_event_name_index;
3731 
3732     if (state->m_flags & STATE_FLAG_TIMED)
3733     {
3734       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3735       event_name_array[index].aggregate_value(wait_time);
3736     }
3737     else
3738     {
3739       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3740       event_name_array[index].aggregate_counted();
3741     }
3742 
3743     if (state->m_flags & STATE_FLAG_EVENT)
3744     {
3745       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3746       DBUG_ASSERT(wait != NULL);
3747 
3748       wait->m_timer_end= timer_end;
3749       wait->m_end_event_id= thread->m_event_id;
3750       if (flag_events_waits_history)
3751         insert_events_waits_history(thread, wait);
3752       if (flag_events_waits_history_long)
3753         insert_events_waits_history_long(wait);
3754       thread->m_events_waits_current--;
3755 
3756       DBUG_ASSERT(wait == thread->m_events_waits_current);
3757     }
3758   }
3759 }
3760 
3761 /**
3762   Implementation of the table instrumentation interface.
3763   @sa PSI_v1::end_table_io_wait.
3764 */
end_table_io_wait_v1(PSI_table_locker * locker)3765 static void end_table_io_wait_v1(PSI_table_locker* locker)
3766 {
3767   PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3768   DBUG_ASSERT(state != NULL);
3769 
3770   ulonglong timer_end= 0;
3771   ulonglong wait_time= 0;
3772 
3773   PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3774   DBUG_ASSERT(table != NULL);
3775 
3776   PFS_single_stat *stat;
3777   PFS_table_io_stat *table_io_stat;
3778 
3779   DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
3780               (state->m_index == MAX_INDEXES));
3781 
3782   table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
3783   table_io_stat->m_has_data= true;
3784 
3785   switch (state->m_io_operation)
3786   {
3787   case PSI_TABLE_FETCH_ROW:
3788     stat= & table_io_stat->m_fetch;
3789     break;
3790   case PSI_TABLE_WRITE_ROW:
3791     stat= & table_io_stat->m_insert;
3792     break;
3793   case PSI_TABLE_UPDATE_ROW:
3794     stat= & table_io_stat->m_update;
3795     break;
3796   case PSI_TABLE_DELETE_ROW:
3797     stat= & table_io_stat->m_delete;
3798     break;
3799   default:
3800     DBUG_ASSERT(false);
3801     stat= NULL;
3802     break;
3803   }
3804 
3805   uint flags= state->m_flags;
3806 
3807   if (flags & STATE_FLAG_TIMED)
3808   {
3809     timer_end= state->m_timer();
3810     wait_time= timer_end - state->m_timer_start;
3811     stat->aggregate_value(wait_time);
3812   }
3813   else
3814   {
3815     stat->aggregate_counted();
3816   }
3817 
3818   if (flags & STATE_FLAG_THREAD)
3819   {
3820     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3821     DBUG_ASSERT(thread != NULL);
3822 
3823     PFS_single_stat *event_name_array;
3824     event_name_array= thread->m_instr_class_waits_stats;
3825 
3826     /*
3827       Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3828       (for wait/io/table/sql/handler)
3829     */
3830     if (flags & STATE_FLAG_TIMED)
3831     {
3832       event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
3833     }
3834     else
3835     {
3836       event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
3837     }
3838 
3839     if (flags & STATE_FLAG_EVENT)
3840     {
3841       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3842       DBUG_ASSERT(wait != NULL);
3843 
3844       wait->m_timer_end= timer_end;
3845       wait->m_end_event_id= thread->m_event_id;
3846       if (flag_events_waits_history)
3847         insert_events_waits_history(thread, wait);
3848       if (flag_events_waits_history_long)
3849         insert_events_waits_history_long(wait);
3850       thread->m_events_waits_current--;
3851 
3852       DBUG_ASSERT(wait == thread->m_events_waits_current);
3853     }
3854   }
3855 
3856   table->m_has_io_stats= true;
3857 }
3858 
3859 /**
3860   Implementation of the table instrumentation interface.
3861   @sa PSI_v1::end_table_lock_wait.
3862 */
end_table_lock_wait_v1(PSI_table_locker * locker)3863 static void end_table_lock_wait_v1(PSI_table_locker* locker)
3864 {
3865   PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3866   DBUG_ASSERT(state != NULL);
3867 
3868   ulonglong timer_end= 0;
3869   ulonglong wait_time= 0;
3870 
3871   PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3872   DBUG_ASSERT(table != NULL);
3873 
3874   PFS_single_stat *stat= & table->m_table_stat.m_lock_stat.m_stat[state->m_index];
3875 
3876   uint flags= state->m_flags;
3877 
3878   if (flags & STATE_FLAG_TIMED)
3879   {
3880     timer_end= state->m_timer();
3881     wait_time= timer_end - state->m_timer_start;
3882     stat->aggregate_value(wait_time);
3883   }
3884   else
3885   {
3886     stat->aggregate_counted();
3887   }
3888 
3889   if (flags & STATE_FLAG_THREAD)
3890   {
3891     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3892     DBUG_ASSERT(thread != NULL);
3893 
3894     PFS_single_stat *event_name_array;
3895     event_name_array= thread->m_instr_class_waits_stats;
3896 
3897     /*
3898       Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3899       (for wait/lock/table/sql/handler)
3900     */
3901     if (flags & STATE_FLAG_TIMED)
3902     {
3903       event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
3904     }
3905     else
3906     {
3907       event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
3908     }
3909 
3910     if (flags & STATE_FLAG_EVENT)
3911     {
3912       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3913       DBUG_ASSERT(wait != NULL);
3914 
3915       wait->m_timer_end= timer_end;
3916       wait->m_end_event_id= thread->m_event_id;
3917       if (flag_events_waits_history)
3918         insert_events_waits_history(thread, wait);
3919       if (flag_events_waits_history_long)
3920         insert_events_waits_history_long(wait);
3921       thread->m_events_waits_current--;
3922 
3923       DBUG_ASSERT(wait == thread->m_events_waits_current);
3924     }
3925   }
3926 
3927   table->m_has_lock_stats= true;
3928 }
3929 
3930 static void start_file_wait_v1(PSI_file_locker *locker,
3931                                size_t count,
3932                                const char *src_file,
3933                                uint src_line);
3934 
3935 static void end_file_wait_v1(PSI_file_locker *locker,
3936                              size_t count);
3937 
3938 /**
3939   Implementation of the file instrumentation interface.
3940   @sa PSI_v1::start_file_open_wait.
3941 */
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)3942 static void start_file_open_wait_v1(PSI_file_locker *locker,
3943                                     const char *src_file,
3944                                     uint src_line)
3945 {
3946   start_file_wait_v1(locker, 0, src_file, src_line);
3947 
3948   return;
3949 }
3950 
3951 /**
3952   Implementation of the file instrumentation interface.
3953   @sa PSI_v1::end_file_open_wait.
3954 */
end_file_open_wait_v1(PSI_file_locker * locker,void * result)3955 static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
3956                                        void *result)
3957 {
3958   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3959   DBUG_ASSERT(state != NULL);
3960 
3961   switch (state->m_operation)
3962   {
3963   case PSI_FILE_STAT:
3964   case PSI_FILE_RENAME:
3965     break;
3966   case PSI_FILE_STREAM_OPEN:
3967   case PSI_FILE_CREATE:
3968   case PSI_FILE_OPEN:
3969     if (result != NULL)
3970     {
3971       PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
3972       PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
3973       const char *name= state->m_name;
3974       uint len= (uint)strlen(name);
3975       PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
3976       state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3977     }
3978     break;
3979   default:
3980     DBUG_ASSERT(false);
3981     break;
3982   }
3983 
3984   end_file_wait_v1(locker, 0);
3985 
3986   return state->m_file;
3987 }
3988 
3989 /**
3990   Implementation of the file instrumentation interface.
3991   @sa PSI_v1::end_file_open_wait_and_bind_to_descriptor.
3992 */
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)3993 static void end_file_open_wait_and_bind_to_descriptor_v1
3994   (PSI_file_locker *locker, File file)
3995 {
3996   PFS_file *pfs_file= NULL;
3997   int index= (int) file;
3998   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3999   DBUG_ASSERT(state != NULL);
4000 
4001   if (index >= 0)
4002   {
4003     PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
4004     PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4005     const char *name= state->m_name;
4006     uint len= (uint)strlen(name);
4007     pfs_file= find_or_create_file(thread, klass, name, len, true);
4008     state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4009   }
4010 
4011   end_file_wait_v1(locker, 0);
4012 
4013   if (likely(index >= 0))
4014   {
4015     if (likely(index < file_handle_max))
4016       file_handle_array[index]= pfs_file;
4017     else
4018     {
4019       if (pfs_file != NULL)
4020         release_file(pfs_file);
4021       file_handle_lost++;
4022     }
4023   }
4024 }
4025 
4026 /**
4027   Implementation of the file instrumentation interface.
4028   @sa PSI_v1::start_file_wait.
4029 */
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)4030 static void start_file_wait_v1(PSI_file_locker *locker,
4031                                size_t count,
4032                                const char *src_file,
4033                                uint src_line)
4034 {
4035   ulonglong timer_start= 0;
4036   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4037   DBUG_ASSERT(state != NULL);
4038 
4039   uint flags= state->m_flags;
4040 
4041   if (flags & STATE_FLAG_TIMED)
4042   {
4043     timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
4044     state->m_timer_start= timer_start;
4045   }
4046 
4047   if (flags & STATE_FLAG_EVENT)
4048   {
4049     PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4050     DBUG_ASSERT(wait != NULL);
4051 
4052     wait->m_timer_start= timer_start;
4053     wait->m_source_file= src_file;
4054     wait->m_source_line= src_line;
4055     wait->m_number_of_bytes= count;
4056   }
4057 }
4058 
4059 /**
4060   Implementation of the file instrumentation interface.
4061   @sa PSI_v1::end_file_wait.
4062 */
end_file_wait_v1(PSI_file_locker * locker,size_t byte_count)4063 static void end_file_wait_v1(PSI_file_locker *locker,
4064                              size_t byte_count)
4065 {
4066   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4067   DBUG_ASSERT(state != NULL);
4068   PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
4069   PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
4070   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4071 
4072   ulonglong timer_end= 0;
4073   ulonglong wait_time= 0;
4074   PFS_byte_stat *byte_stat;
4075   uint flags= state->m_flags;
4076   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
4077 
4078   PFS_file_stat *file_stat;
4079 
4080   if (file != NULL)
4081   {
4082     file_stat= & file->m_file_stat;
4083   }
4084   else
4085   {
4086     file_stat= & klass->m_file_stat;
4087   }
4088 
4089   switch (state->m_operation)
4090   {
4091     /* Group read operations */
4092     case PSI_FILE_READ:
4093       byte_stat= &file_stat->m_io_stat.m_read;
4094       break;
4095     /* Group write operations */
4096     case PSI_FILE_WRITE:
4097       byte_stat= &file_stat->m_io_stat.m_write;
4098       break;
4099     /* Group remaining operations as miscellaneous */
4100     case PSI_FILE_CREATE:
4101     case PSI_FILE_CREATE_TMP:
4102     case PSI_FILE_OPEN:
4103     case PSI_FILE_STREAM_OPEN:
4104     case PSI_FILE_STREAM_CLOSE:
4105     case PSI_FILE_SEEK:
4106     case PSI_FILE_TELL:
4107     case PSI_FILE_FLUSH:
4108     case PSI_FILE_FSTAT:
4109     case PSI_FILE_CHSIZE:
4110     case PSI_FILE_DELETE:
4111     case PSI_FILE_RENAME:
4112     case PSI_FILE_SYNC:
4113     case PSI_FILE_STAT:
4114     case PSI_FILE_CLOSE:
4115       byte_stat= &file_stat->m_io_stat.m_misc;
4116       break;
4117     default:
4118       DBUG_ASSERT(false);
4119       byte_stat= NULL;
4120       break;
4121   }
4122 
4123   /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
4124   if (flags & STATE_FLAG_TIMED)
4125   {
4126     timer_end= state->m_timer();
4127     wait_time= timer_end - state->m_timer_start;
4128     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
4129     byte_stat->aggregate(wait_time, bytes);
4130   }
4131   else
4132   {
4133     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
4134     byte_stat->aggregate_counted(bytes);
4135   }
4136 
4137   if (flags & STATE_FLAG_THREAD)
4138   {
4139     DBUG_ASSERT(thread != NULL);
4140 
4141     PFS_single_stat *event_name_array;
4142     event_name_array= thread->m_instr_class_waits_stats;
4143     uint index= klass->m_event_name_index;
4144 
4145     if (flags & STATE_FLAG_TIMED)
4146     {
4147       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4148       event_name_array[index].aggregate_value(wait_time);
4149     }
4150     else
4151     {
4152       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4153       event_name_array[index].aggregate_counted();
4154     }
4155 
4156     if (state->m_flags & STATE_FLAG_EVENT)
4157     {
4158       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4159       DBUG_ASSERT(wait != NULL);
4160 
4161       wait->m_timer_end= timer_end;
4162       wait->m_number_of_bytes= bytes;
4163       wait->m_end_event_id= thread->m_event_id;
4164       wait->m_object_instance_addr= file;
4165       wait->m_weak_file= file;
4166       wait->m_weak_version= (file ? file->get_version() : 0);
4167 
4168       if (flag_events_waits_history)
4169         insert_events_waits_history(thread, wait);
4170       if (flag_events_waits_history_long)
4171         insert_events_waits_history_long(wait);
4172       thread->m_events_waits_current--;
4173 
4174       DBUG_ASSERT(wait == thread->m_events_waits_current);
4175     }
4176   }
4177 }
4178 
4179 /**
4180   Implementation of the file instrumentation interface.
4181   @sa PSI_v1::start_file_close_wait.
4182 */
start_file_close_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)4183 static void start_file_close_wait_v1(PSI_file_locker *locker,
4184                                      const char *src_file,
4185                                      uint src_line)
4186 {
4187   PFS_thread *thread;
4188   const char *name;
4189   uint len;
4190   PFS_file *pfs_file;
4191   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4192   DBUG_ASSERT(state != NULL);
4193 
4194   switch (state->m_operation)
4195   {
4196   case PSI_FILE_DELETE:
4197     thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4198     name= state->m_name;
4199     len= (uint)strlen(name);
4200     pfs_file= find_or_create_file(thread, NULL, name, len, false);
4201     state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4202     break;
4203   case PSI_FILE_STREAM_CLOSE:
4204   case PSI_FILE_CLOSE:
4205     break;
4206   default:
4207     DBUG_ASSERT(false);
4208     break;
4209   }
4210 
4211   start_file_wait_v1(locker, 0, src_file, src_line);
4212 
4213   return;
4214 }
4215 
4216 /**
4217   Implementation of the file instrumentation interface.
4218   @sa PSI_v1::end_file_close_wait.
4219 */
end_file_close_wait_v1(PSI_file_locker * locker,int rc)4220 static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
4221 {
4222   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4223   DBUG_ASSERT(state != NULL);
4224 
4225   end_file_wait_v1(locker, 0);
4226 
4227   if (rc == 0)
4228   {
4229     PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4230     PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
4231 
4232     /* Release or destroy the file if necessary */
4233     switch(state->m_operation)
4234     {
4235     case PSI_FILE_CLOSE:
4236     case PSI_FILE_STREAM_CLOSE:
4237       if (file != NULL)
4238         release_file(file);
4239       break;
4240     case PSI_FILE_DELETE:
4241       if (file != NULL)
4242         destroy_file(thread, file);
4243       break;
4244     default:
4245       DBUG_ASSERT(false);
4246       break;
4247     }
4248   }
4249   return;
4250 }
4251 
start_stage_v1(PSI_stage_key key,const char * src_file,int src_line)4252 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
4253 {
4254   ulonglong timer_value= 0;
4255 
4256   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4257   if (unlikely(pfs_thread == NULL))
4258     return;
4259 
4260   /* Always update column threads.processlist_state. */
4261   pfs_thread->m_stage= key;
4262 
4263   if (psi_unlikely(! flag_global_instrumentation))
4264     return;
4265 
4266   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4267     return;
4268 
4269   PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4270   PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4271   PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4272 
4273   PFS_instr_class *old_class= pfs->m_class;
4274   if (old_class != NULL)
4275   {
4276     PFS_stage_stat *event_name_array;
4277     event_name_array= pfs_thread->m_instr_class_stages_stats;
4278     uint index= old_class->m_event_name_index;
4279 
4280     /* Finish old event */
4281     if (old_class->m_timed)
4282     {
4283       timer_value= get_timer_raw_value(stage_timer);;
4284       pfs->m_timer_end= timer_value;
4285 
4286       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4287       ulonglong stage_time= timer_value - pfs->m_timer_start;
4288       event_name_array[index].aggregate_value(stage_time);
4289     }
4290     else
4291     {
4292       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4293       event_name_array[index].aggregate_counted();
4294     }
4295 
4296     if (flag_events_stages_current)
4297     {
4298       pfs->m_end_event_id= pfs_thread->m_event_id;
4299       if (flag_events_stages_history)
4300         insert_events_stages_history(pfs_thread, pfs);
4301       if (flag_events_stages_history_long)
4302         insert_events_stages_history_long(pfs);
4303     }
4304 
4305     /* This stage event is now complete. */
4306     pfs->m_class= NULL;
4307 
4308     /* New waits will now be attached directly to the parent statement. */
4309     child_wait->m_event_id= parent_statement->m_event_id;
4310     child_wait->m_event_type= parent_statement->m_event_type;
4311     /* See below for new stages, that may overwrite this. */
4312   }
4313 
4314   /* Start new event */
4315 
4316   PFS_stage_class *new_klass= find_stage_class(key);
4317   if (unlikely(new_klass == NULL))
4318     return;
4319 
4320   if (! new_klass->m_enabled)
4321     return;
4322 
4323   pfs->m_class= new_klass;
4324   if (new_klass->m_timed)
4325   {
4326     /*
4327       Do not call the timer again if we have a
4328       TIMER_END for the previous stage already.
4329     */
4330     if (timer_value == 0)
4331       timer_value= get_timer_raw_value(stage_timer);
4332     pfs->m_timer_start= timer_value;
4333   }
4334   else
4335     pfs->m_timer_start= 0;
4336   pfs->m_timer_end= 0;
4337 
4338   if (flag_events_stages_current)
4339   {
4340     /* m_thread_internal_id is immutable and already set */
4341     DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4342     pfs->m_event_id= pfs_thread->m_event_id++;
4343     pfs->m_end_event_id= 0;
4344     pfs->m_source_file= src_file;
4345     pfs->m_source_line= src_line;
4346 
4347     /* New wait events will have this new stage as parent. */
4348     child_wait->m_event_id= pfs->m_event_id;
4349     child_wait->m_event_type= EVENT_TYPE_STAGE;
4350   }
4351 }
4352 
end_stage_v1()4353 static void end_stage_v1()
4354 {
4355   ulonglong timer_value= 0;
4356 
4357   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4358   if (unlikely(pfs_thread == NULL))
4359     return;
4360 
4361   pfs_thread->m_stage= 0;
4362 
4363   if (psi_unlikely(! flag_global_instrumentation))
4364     return;
4365 
4366   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4367     return;
4368 
4369   PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4370 
4371   PFS_instr_class *old_class= pfs->m_class;
4372   if (old_class != NULL)
4373   {
4374     PFS_stage_stat *event_name_array;
4375     event_name_array= pfs_thread->m_instr_class_stages_stats;
4376     uint index= old_class->m_event_name_index;
4377 
4378     /* Finish old event */
4379     if (old_class->m_timed)
4380     {
4381       timer_value= get_timer_raw_value(stage_timer);;
4382       pfs->m_timer_end= timer_value;
4383 
4384       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4385       ulonglong stage_time= timer_value - pfs->m_timer_start;
4386       event_name_array[index].aggregate_value(stage_time);
4387     }
4388     else
4389     {
4390       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4391       event_name_array[index].aggregate_counted();
4392     }
4393 
4394     if (flag_events_stages_current)
4395     {
4396       pfs->m_end_event_id= pfs_thread->m_event_id;
4397       if (flag_events_stages_history)
4398         insert_events_stages_history(pfs_thread, pfs);
4399       if (flag_events_stages_history_long)
4400         insert_events_stages_history_long(pfs);
4401     }
4402 
4403     /* New waits will now be attached directly to the parent statement. */
4404     PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4405     PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4406     child_wait->m_event_id= parent_statement->m_event_id;
4407     child_wait->m_event_type= parent_statement->m_event_type;
4408 
4409     /* This stage is completed */
4410     pfs->m_class= NULL;
4411   }
4412 }
4413 
4414 static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state * state,PSI_statement_key key,const void * charset)4415 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
4416                                PSI_statement_key key,
4417                                const void *charset)
4418 {
4419   DBUG_ASSERT(state != NULL);
4420   DBUG_ASSERT(charset != NULL);
4421 
4422   if (psi_unlikely(! flag_global_instrumentation))
4423     return NULL;
4424   PFS_statement_class *klass= find_statement_class(key);
4425   if (unlikely(klass == NULL))
4426     return NULL;
4427   if (! klass->m_enabled)
4428     return NULL;
4429 
4430   uint flags;
4431 
4432   if (flag_thread_instrumentation)
4433   {
4434     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4435     if (unlikely(pfs_thread == NULL))
4436       return NULL;
4437     if (! pfs_thread->m_enabled)
4438       return NULL;
4439     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
4440     flags= STATE_FLAG_THREAD;
4441 
4442     if (klass->m_timed)
4443       flags|= STATE_FLAG_TIMED;
4444 
4445     if (flag_events_statements_current)
4446     {
4447       ulonglong event_id= pfs_thread->m_event_id++;
4448 
4449       if (pfs_thread->m_events_statements_count >= statement_stack_max)
4450       {
4451         return NULL;
4452       }
4453 
4454       pfs_thread->m_stmt_lock.allocated_to_dirty();
4455       PFS_events_statements *pfs= & pfs_thread->m_statement_stack[pfs_thread->m_events_statements_count];
4456       /* m_thread_internal_id is immutable and already set */
4457       DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4458       pfs->m_event_id= event_id;
4459       pfs->m_end_event_id= 0;
4460       pfs->m_class= klass;
4461       pfs->m_timer_start= 0;
4462       pfs->m_timer_end= 0;
4463       pfs->m_lock_time= 0;
4464       pfs->m_current_schema_name_length= 0;
4465       pfs->m_sqltext_length= 0;
4466       pfs->m_sqltext_truncated= false;
4467       pfs->m_sqltext_cs_number= system_charset_info->number; /* default */
4468 
4469       pfs->m_message_text[0]= '\0';
4470       pfs->m_sql_errno= 0;
4471       pfs->m_sqlstate[0]= '\0';
4472       pfs->m_error_count= 0;
4473       pfs->m_warning_count= 0;
4474       pfs->m_rows_affected= 0;
4475 
4476       pfs->m_rows_sent= 0;
4477       pfs->m_rows_examined= 0;
4478       pfs->m_created_tmp_disk_tables= 0;
4479       pfs->m_created_tmp_tables= 0;
4480       pfs->m_select_full_join= 0;
4481       pfs->m_select_full_range_join= 0;
4482       pfs->m_select_range= 0;
4483       pfs->m_select_range_check= 0;
4484       pfs->m_select_scan= 0;
4485       pfs->m_sort_merge_passes= 0;
4486       pfs->m_sort_range= 0;
4487       pfs->m_sort_rows= 0;
4488       pfs->m_sort_scan= 0;
4489       pfs->m_no_index_used= 0;
4490       pfs->m_no_good_index_used= 0;
4491       pfs->m_digest_storage.reset();
4492 
4493       /* New stages will have this statement as parent */
4494       PFS_events_stages *child_stage= & pfs_thread->m_stage_current;
4495       child_stage->m_nesting_event_id= event_id;
4496       child_stage->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4497 
4498       /* New waits will have this statement as parent, if no stage is instrumented */
4499       PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4500       child_wait->m_nesting_event_id= event_id;
4501       child_wait->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4502 
4503       state->m_statement= pfs;
4504       flags|= STATE_FLAG_EVENT;
4505 
4506       pfs_thread->m_events_statements_count++;
4507       pfs_thread->m_stmt_lock.dirty_to_allocated();
4508     }
4509   }
4510   else
4511   {
4512     if (klass->m_timed)
4513       flags= STATE_FLAG_TIMED;
4514     else
4515       flags= 0;
4516   }
4517 
4518   if (flag_statements_digest)
4519   {
4520     flags|= STATE_FLAG_DIGEST;
4521   }
4522 
4523   state->m_discarded= false;
4524   state->m_class= klass;
4525   state->m_flags= flags;
4526 
4527   state->m_lock_time= 0;
4528   state->m_rows_sent= 0;
4529   state->m_rows_examined= 0;
4530   state->m_created_tmp_disk_tables= 0;
4531   state->m_created_tmp_tables= 0;
4532   state->m_select_full_join= 0;
4533   state->m_select_full_range_join= 0;
4534   state->m_select_range= 0;
4535   state->m_select_range_check= 0;
4536   state->m_select_scan= 0;
4537   state->m_sort_merge_passes= 0;
4538   state->m_sort_range= 0;
4539   state->m_sort_rows= 0;
4540   state->m_sort_scan= 0;
4541   state->m_no_index_used= 0;
4542   state->m_no_good_index_used= 0;
4543 
4544   state->m_digest= NULL;
4545 
4546   state->m_schema_name_length= 0;
4547   state->m_cs_number= ((CHARSET_INFO *)charset)->number;
4548 
4549   return reinterpret_cast<PSI_statement_locker*> (state);
4550 }
4551 
4552 static PSI_statement_locker*
refine_statement_v1(PSI_statement_locker * locker,PSI_statement_key key)4553 refine_statement_v1(PSI_statement_locker *locker,
4554                     PSI_statement_key key)
4555 {
4556   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4557   if (state == NULL)
4558     return NULL;
4559   DBUG_ASSERT(state->m_class != NULL);
4560   PFS_statement_class *klass;
4561   /* Only refine statements for mutable instrumentation */
4562   klass= reinterpret_cast<PFS_statement_class*> (state->m_class);
4563   DBUG_ASSERT(klass->is_mutable());
4564   klass= find_statement_class(key);
4565 
4566   uint flags= state->m_flags;
4567 
4568   if (unlikely(klass == NULL) || !klass->m_enabled)
4569   {
4570     /* pop statement stack */
4571     if (flags & STATE_FLAG_THREAD)
4572     {
4573       PFS_thread *pfs_thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4574       DBUG_ASSERT(pfs_thread != NULL);
4575       if (pfs_thread->m_events_statements_count > 0)
4576         pfs_thread->m_events_statements_count--;
4577     }
4578 
4579     state->m_discarded= true;
4580     return NULL;
4581   }
4582 
4583   if ((flags & STATE_FLAG_TIMED) && ! klass->m_timed)
4584     flags= flags & ~STATE_FLAG_TIMED;
4585 
4586   if (flags & STATE_FLAG_EVENT)
4587   {
4588     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4589     DBUG_ASSERT(pfs != NULL);
4590 
4591     /* mutate EVENTS_STATEMENTS_CURRENT.EVENT_NAME */
4592     pfs->m_class= klass;
4593   }
4594 
4595   state->m_class= klass;
4596   state->m_flags= flags;
4597   return reinterpret_cast<PSI_statement_locker*> (state);
4598 }
4599 
start_statement_v1(PSI_statement_locker * locker,const char * db,uint db_len,const char * src_file,uint src_line)4600 static void start_statement_v1(PSI_statement_locker *locker,
4601                                const char *db, uint db_len,
4602                                const char *src_file, uint src_line)
4603 {
4604   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4605   DBUG_ASSERT(state != NULL);
4606 
4607   uint flags= state->m_flags;
4608   ulonglong timer_start= 0;
4609 
4610   if (flags & STATE_FLAG_TIMED)
4611   {
4612     timer_start= get_timer_raw_value_and_function(statement_timer, & state->m_timer);
4613     state->m_timer_start= timer_start;
4614   }
4615 
4616   compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
4617   DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
4618 
4619   if (db_len > 0)
4620     memcpy(state->m_schema_name, db, db_len);
4621   state->m_schema_name_length= db_len;
4622 
4623   if (flags & STATE_FLAG_EVENT)
4624   {
4625     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4626     DBUG_ASSERT(pfs != NULL);
4627 
4628     pfs->m_timer_start= timer_start;
4629     pfs->m_source_file= src_file;
4630     pfs->m_source_line= src_line;
4631 
4632     DBUG_ASSERT(db_len <= sizeof(pfs->m_current_schema_name));
4633     if (db_len > 0)
4634       memcpy(pfs->m_current_schema_name, db, db_len);
4635     pfs->m_current_schema_name_length= db_len;
4636   }
4637 }
4638 
set_statement_text_v1(PSI_statement_locker * locker,const char * text,uint text_len)4639 static void set_statement_text_v1(PSI_statement_locker *locker,
4640                                   const char *text, uint text_len)
4641 {
4642   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4643   DBUG_ASSERT(state != NULL);
4644 
4645   if (state->m_discarded)
4646     return;
4647 
4648   if (state->m_flags & STATE_FLAG_EVENT)
4649   {
4650     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4651     DBUG_ASSERT(pfs != NULL);
4652     if (text_len > sizeof (pfs->m_sqltext))
4653     {
4654       text_len= sizeof(pfs->m_sqltext);
4655       pfs->m_sqltext_truncated= true;
4656     }
4657     if (text_len)
4658       memcpy(pfs->m_sqltext, text, text_len);
4659     pfs->m_sqltext_length= text_len;
4660     pfs->m_sqltext_cs_number= state->m_cs_number;
4661   }
4662 
4663   return;
4664 }
4665 
4666 #define SET_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE)                    \
4667   PSI_statement_locker_state *state;                                    \
4668   state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER);        \
4669   if (unlikely(state == NULL))                                          \
4670     return;                                                             \
4671   if (state->m_discarded)                                               \
4672     return;                                                             \
4673   state->ATTR= VALUE;                                                   \
4674   if (state->m_flags & STATE_FLAG_EVENT)                                \
4675   {                                                                     \
4676     PFS_events_statements *pfs;                                         \
4677     pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4678     DBUG_ASSERT(pfs != NULL);                                           \
4679     pfs->ATTR= VALUE;                                                   \
4680   }                                                                     \
4681   return;
4682 
4683 #define INC_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE)                    \
4684   PSI_statement_locker_state *state;                                    \
4685   state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER);        \
4686   if (unlikely(state == NULL))                                          \
4687     return;                                                             \
4688   if (state->m_discarded)                                               \
4689     return;                                                             \
4690   state->ATTR+= VALUE;                                                  \
4691   if (state->m_flags & STATE_FLAG_EVENT)                                \
4692   {                                                                     \
4693     PFS_events_statements *pfs;                                         \
4694     pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4695     DBUG_ASSERT(pfs != NULL);                                           \
4696     pfs->ATTR+= VALUE;                                                  \
4697   }                                                                     \
4698   return;
4699 
set_statement_lock_time_v1(PSI_statement_locker * locker,ulonglong count)4700 static void set_statement_lock_time_v1(PSI_statement_locker *locker,
4701                                        ulonglong count)
4702 {
4703   SET_STATEMENT_ATTR_BODY(locker, m_lock_time, count);
4704 }
4705 
set_statement_rows_sent_v1(PSI_statement_locker * locker,ulonglong count)4706 static void set_statement_rows_sent_v1(PSI_statement_locker *locker,
4707                                        ulonglong count)
4708 {
4709   SET_STATEMENT_ATTR_BODY(locker, m_rows_sent, count);
4710 }
4711 
set_statement_rows_examined_v1(PSI_statement_locker * locker,ulonglong count)4712 static void set_statement_rows_examined_v1(PSI_statement_locker *locker,
4713                                            ulonglong count)
4714 {
4715   SET_STATEMENT_ATTR_BODY(locker, m_rows_examined, count);
4716 }
4717 
inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker * locker,ulong count)4718 static void inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker *locker,
4719                                                     ulong count)
4720 {
4721   INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_disk_tables, count);
4722 }
4723 
inc_statement_created_tmp_tables_v1(PSI_statement_locker * locker,ulong count)4724 static void inc_statement_created_tmp_tables_v1(PSI_statement_locker *locker,
4725                                                 ulong count)
4726 {
4727   INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_tables, count);
4728 }
4729 
inc_statement_select_full_join_v1(PSI_statement_locker * locker,ulong count)4730 static void inc_statement_select_full_join_v1(PSI_statement_locker *locker,
4731                                               ulong count)
4732 {
4733   INC_STATEMENT_ATTR_BODY(locker, m_select_full_join, count);
4734 }
4735 
inc_statement_select_full_range_join_v1(PSI_statement_locker * locker,ulong count)4736 static void inc_statement_select_full_range_join_v1(PSI_statement_locker *locker,
4737                                                     ulong count)
4738 {
4739   INC_STATEMENT_ATTR_BODY(locker, m_select_full_range_join, count);
4740 }
4741 
inc_statement_select_range_v1(PSI_statement_locker * locker,ulong count)4742 static void inc_statement_select_range_v1(PSI_statement_locker *locker,
4743                                           ulong count)
4744 {
4745   INC_STATEMENT_ATTR_BODY(locker, m_select_range, count);
4746 }
4747 
inc_statement_select_range_check_v1(PSI_statement_locker * locker,ulong count)4748 static void inc_statement_select_range_check_v1(PSI_statement_locker *locker,
4749                                                 ulong count)
4750 {
4751   INC_STATEMENT_ATTR_BODY(locker, m_select_range_check, count);
4752 }
4753 
inc_statement_select_scan_v1(PSI_statement_locker * locker,ulong count)4754 static void inc_statement_select_scan_v1(PSI_statement_locker *locker,
4755                                          ulong count)
4756 {
4757   INC_STATEMENT_ATTR_BODY(locker, m_select_scan, count);
4758 }
4759 
inc_statement_sort_merge_passes_v1(PSI_statement_locker * locker,ulong count)4760 static void inc_statement_sort_merge_passes_v1(PSI_statement_locker *locker,
4761                                                ulong count)
4762 {
4763   INC_STATEMENT_ATTR_BODY(locker, m_sort_merge_passes, count);
4764 }
4765 
inc_statement_sort_range_v1(PSI_statement_locker * locker,ulong count)4766 static void inc_statement_sort_range_v1(PSI_statement_locker *locker,
4767                                         ulong count)
4768 {
4769   INC_STATEMENT_ATTR_BODY(locker, m_sort_range, count);
4770 }
4771 
inc_statement_sort_rows_v1(PSI_statement_locker * locker,ulong count)4772 static void inc_statement_sort_rows_v1(PSI_statement_locker *locker,
4773                                        ulong count)
4774 {
4775   INC_STATEMENT_ATTR_BODY(locker, m_sort_rows, count);
4776 }
4777 
inc_statement_sort_scan_v1(PSI_statement_locker * locker,ulong count)4778 static void inc_statement_sort_scan_v1(PSI_statement_locker *locker,
4779                                        ulong count)
4780 {
4781   INC_STATEMENT_ATTR_BODY(locker, m_sort_scan, count);
4782 }
4783 
set_statement_no_index_used_v1(PSI_statement_locker * locker)4784 static void set_statement_no_index_used_v1(PSI_statement_locker *locker)
4785 {
4786   SET_STATEMENT_ATTR_BODY(locker, m_no_index_used, 1);
4787 }
4788 
set_statement_no_good_index_used_v1(PSI_statement_locker * locker)4789 static void set_statement_no_good_index_used_v1(PSI_statement_locker *locker)
4790 {
4791   SET_STATEMENT_ATTR_BODY(locker, m_no_good_index_used, 1);
4792 }
4793 
end_statement_v1(PSI_statement_locker * locker,void * stmt_da)4794 static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
4795 {
4796   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4797   Diagnostics_area *da= reinterpret_cast<Diagnostics_area*> (stmt_da);
4798   DBUG_ASSERT(state != NULL);
4799   DBUG_ASSERT(da != NULL);
4800 
4801   if (state->m_discarded)
4802     return;
4803 
4804   PFS_statement_class *klass= reinterpret_cast<PFS_statement_class *> (state->m_class);
4805   DBUG_ASSERT(klass != NULL);
4806 
4807   ulonglong timer_end= 0;
4808   ulonglong wait_time= 0;
4809   uint flags= state->m_flags;
4810 
4811   if (flags & STATE_FLAG_TIMED)
4812   {
4813     timer_end= state->m_timer();
4814     wait_time= timer_end - state->m_timer_start;
4815   }
4816 
4817   PFS_statement_stat *event_name_array;
4818   uint index= klass->m_event_name_index;
4819   PFS_statement_stat *stat;
4820 
4821   /*
4822    Capture statement stats by digest.
4823   */
4824   const sql_digest_storage *digest_storage= NULL;
4825   PFS_statement_stat *digest_stat= NULL;
4826 
4827   if (flags & STATE_FLAG_THREAD)
4828   {
4829     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4830     DBUG_ASSERT(thread != NULL);
4831     event_name_array= thread->m_instr_class_statements_stats;
4832     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME */
4833     stat= & event_name_array[index];
4834 
4835     if (flags & STATE_FLAG_DIGEST)
4836     {
4837       digest_storage= state->m_digest;
4838 
4839       if (digest_storage != NULL)
4840       {
4841         /* Populate PFS_statements_digest_stat with computed digest information.*/
4842         digest_stat= find_or_create_digest(thread, digest_storage,
4843                                            state->m_schema_name,
4844                                            state->m_schema_name_length);
4845       }
4846     }
4847 
4848     if (flags & STATE_FLAG_EVENT)
4849     {
4850       PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4851       DBUG_ASSERT(pfs != NULL);
4852 
4853       thread->m_stmt_lock.allocated_to_dirty();
4854 
4855       switch(da->status())
4856       {
4857         case Diagnostics_area::DA_OK_BULK:
4858         case Diagnostics_area::DA_EMPTY:
4859           break;
4860         case Diagnostics_area::DA_OK:
4861           memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4862           pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4863           pfs->m_rows_affected= da->affected_rows();
4864           pfs->m_warning_count= da->statement_warn_count();
4865           memcpy(pfs->m_sqlstate, "00000", SQLSTATE_LENGTH);
4866           break;
4867         case Diagnostics_area::DA_EOF:
4868           pfs->m_warning_count= da->statement_warn_count();
4869           break;
4870         case Diagnostics_area::DA_ERROR:
4871           memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4872           pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4873           pfs->m_sql_errno= da->sql_errno();
4874           pfs->m_error_count++;
4875           memcpy(pfs->m_sqlstate, da->get_sqlstate(), SQLSTATE_LENGTH);
4876           break;
4877         case Diagnostics_area::DA_DISABLED:
4878           break;
4879       }
4880 
4881       pfs->m_timer_end= timer_end;
4882       pfs->m_end_event_id= thread->m_event_id;
4883 
4884       if (digest_storage != NULL)
4885       {
4886         /*
4887           The following columns in events_statement_current:
4888           - DIGEST,
4889           - DIGEST_TEXT
4890           are computed from the digest storage.
4891         */
4892         pfs->m_digest_storage.copy(digest_storage);
4893       }
4894 
4895       if (flag_events_statements_history)
4896         insert_events_statements_history(thread, pfs);
4897       if (flag_events_statements_history_long)
4898         insert_events_statements_history_long(pfs);
4899 
4900       DBUG_ASSERT(thread->m_events_statements_count > 0);
4901       thread->m_events_statements_count--;
4902       thread->m_stmt_lock.dirty_to_allocated();
4903     }
4904   }
4905   else
4906   {
4907     if (flags & STATE_FLAG_DIGEST)
4908     {
4909       PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4910 
4911       /* An instrumented thread is required, for LF_PINS. */
4912       if (thread != NULL)
4913       {
4914         /* Set digest stat. */
4915         digest_storage= state->m_digest;
4916 
4917         if (digest_storage != NULL)
4918         {
4919           /* Populate statements_digest_stat with computed digest information. */
4920           digest_stat= find_or_create_digest(thread, digest_storage,
4921                                              state->m_schema_name,
4922                                              state->m_schema_name_length);
4923         }
4924       }
4925     }
4926 
4927     event_name_array= global_instr_class_statements_array;
4928     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME */
4929     stat= & event_name_array[index];
4930   }
4931 
4932   if (flags & STATE_FLAG_TIMED)
4933   {
4934     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (timed) */
4935     stat->aggregate_value(wait_time);
4936   }
4937   else
4938   {
4939     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (counted) */
4940     stat->aggregate_counted();
4941   }
4942 
4943   stat->m_lock_time+= state->m_lock_time;
4944   stat->m_rows_sent+= state->m_rows_sent;
4945   stat->m_rows_examined+= state->m_rows_examined;
4946   stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4947   stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4948   stat->m_select_full_join+= state->m_select_full_join;
4949   stat->m_select_full_range_join+= state->m_select_full_range_join;
4950   stat->m_select_range+= state->m_select_range;
4951   stat->m_select_range_check+= state->m_select_range_check;
4952   stat->m_select_scan+= state->m_select_scan;
4953   stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4954   stat->m_sort_range+= state->m_sort_range;
4955   stat->m_sort_rows+= state->m_sort_rows;
4956   stat->m_sort_scan+= state->m_sort_scan;
4957   stat->m_no_index_used+= state->m_no_index_used;
4958   stat->m_no_good_index_used+= state->m_no_good_index_used;
4959 
4960   if (digest_stat != NULL)
4961   {
4962     if (flags & STATE_FLAG_TIMED)
4963     {
4964       digest_stat->aggregate_value(wait_time);
4965     }
4966     else
4967     {
4968       digest_stat->aggregate_counted();
4969     }
4970 
4971     digest_stat->m_lock_time+= state->m_lock_time;
4972     digest_stat->m_rows_sent+= state->m_rows_sent;
4973     digest_stat->m_rows_examined+= state->m_rows_examined;
4974     digest_stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4975     digest_stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4976     digest_stat->m_select_full_join+= state->m_select_full_join;
4977     digest_stat->m_select_full_range_join+= state->m_select_full_range_join;
4978     digest_stat->m_select_range+= state->m_select_range;
4979     digest_stat->m_select_range_check+= state->m_select_range_check;
4980     digest_stat->m_select_scan+= state->m_select_scan;
4981     digest_stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4982     digest_stat->m_sort_range+= state->m_sort_range;
4983     digest_stat->m_sort_rows+= state->m_sort_rows;
4984     digest_stat->m_sort_scan+= state->m_sort_scan;
4985     digest_stat->m_no_index_used+= state->m_no_index_used;
4986     digest_stat->m_no_good_index_used+= state->m_no_good_index_used;
4987   }
4988 
4989   switch (da->status())
4990   {
4991     case Diagnostics_area::DA_OK_BULK:
4992     case Diagnostics_area::DA_EMPTY:
4993       break;
4994     case Diagnostics_area::DA_OK:
4995       stat->m_rows_affected+= da->affected_rows();
4996       stat->m_warning_count+= da->statement_warn_count();
4997       if (digest_stat != NULL)
4998       {
4999         digest_stat->m_rows_affected+= da->affected_rows();
5000         digest_stat->m_warning_count+= da->statement_warn_count();
5001       }
5002       break;
5003     case Diagnostics_area::DA_EOF:
5004       stat->m_warning_count+= da->statement_warn_count();
5005       if (digest_stat != NULL)
5006       {
5007         digest_stat->m_warning_count+= da->statement_warn_count();
5008       }
5009       break;
5010     case Diagnostics_area::DA_ERROR:
5011       stat->m_error_count++;
5012       if (digest_stat != NULL)
5013       {
5014         digest_stat->m_error_count++;
5015       }
5016       break;
5017     case Diagnostics_area::DA_DISABLED:
5018       break;
5019   }
5020 }
5021 
5022 /**
5023   Implementation of the socket instrumentation interface.
5024   @sa PSI_v1::end_socket_wait.
5025 */
end_socket_wait_v1(PSI_socket_locker * locker,size_t byte_count)5026 static void end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
5027 {
5028   PSI_socket_locker_state *state= reinterpret_cast<PSI_socket_locker_state*> (locker);
5029   DBUG_ASSERT(state != NULL);
5030 
5031   PFS_socket *socket= reinterpret_cast<PFS_socket *>(state->m_socket);
5032   DBUG_ASSERT(socket != NULL);
5033 
5034   ulonglong timer_end= 0;
5035   ulonglong wait_time= 0;
5036   PFS_byte_stat *byte_stat;
5037   uint flags= state->m_flags;
5038   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
5039 
5040   switch (state->m_operation)
5041   {
5042     /* Group read operations */
5043     case PSI_SOCKET_RECV:
5044     case PSI_SOCKET_RECVFROM:
5045     case PSI_SOCKET_RECVMSG:
5046       byte_stat= &socket->m_socket_stat.m_io_stat.m_read;
5047       break;
5048     /* Group write operations */
5049     case PSI_SOCKET_SEND:
5050     case PSI_SOCKET_SENDTO:
5051     case PSI_SOCKET_SENDMSG:
5052       byte_stat= &socket->m_socket_stat.m_io_stat.m_write;
5053       break;
5054     /* Group remaining operations as miscellaneous */
5055     case PSI_SOCKET_CONNECT:
5056     case PSI_SOCKET_CREATE:
5057     case PSI_SOCKET_BIND:
5058     case PSI_SOCKET_SEEK:
5059     case PSI_SOCKET_OPT:
5060     case PSI_SOCKET_STAT:
5061     case PSI_SOCKET_SHUTDOWN:
5062     case PSI_SOCKET_SELECT:
5063     case PSI_SOCKET_CLOSE:
5064       byte_stat= &socket->m_socket_stat.m_io_stat.m_misc;
5065       break;
5066     default:
5067       DBUG_ASSERT(false);
5068       byte_stat= NULL;
5069       break;
5070   }
5071 
5072   /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
5073   if (flags & STATE_FLAG_TIMED)
5074   {
5075     timer_end= state->m_timer();
5076     wait_time= timer_end - state->m_timer_start;
5077 
5078     /* Aggregate to the socket instrument for now (timed) */
5079     byte_stat->aggregate(wait_time, bytes);
5080   }
5081   else
5082   {
5083     /* Aggregate to the socket instrument (event count and byte count) */
5084     byte_stat->aggregate_counted(bytes);
5085   }
5086 
5087   /* Aggregate to EVENTS_WAITS_HISTORY and EVENTS_WAITS_HISTORY_LONG */
5088   if (flags & STATE_FLAG_EVENT)
5089   {
5090     PFS_thread *thread= reinterpret_cast<PFS_thread *>(state->m_thread);
5091     DBUG_ASSERT(thread != NULL);
5092     PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
5093     DBUG_ASSERT(wait != NULL);
5094 
5095     wait->m_timer_end= timer_end;
5096     wait->m_end_event_id= thread->m_event_id;
5097     wait->m_number_of_bytes= bytes;
5098 
5099     if (flag_events_waits_history)
5100       insert_events_waits_history(thread, wait);
5101     if (flag_events_waits_history_long)
5102       insert_events_waits_history_long(wait);
5103     thread->m_events_waits_current--;
5104 
5105     DBUG_ASSERT(wait == thread->m_events_waits_current);
5106   }
5107 }
5108 
set_socket_state_v1(PSI_socket * socket,PSI_socket_state state)5109 static void set_socket_state_v1(PSI_socket *socket, PSI_socket_state state)
5110 {
5111   DBUG_ASSERT((state == PSI_SOCKET_STATE_IDLE) || (state == PSI_SOCKET_STATE_ACTIVE));
5112   PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5113   DBUG_ASSERT(pfs != NULL);
5114   DBUG_ASSERT(pfs->m_idle || (state == PSI_SOCKET_STATE_IDLE));
5115   DBUG_ASSERT(!pfs->m_idle || (state == PSI_SOCKET_STATE_ACTIVE));
5116   pfs->m_idle= (state == PSI_SOCKET_STATE_IDLE);
5117 }
5118 
5119 /**
5120   Set socket descriptor and address info.
5121 */
set_socket_info_v1(PSI_socket * socket,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)5122 static void set_socket_info_v1(PSI_socket *socket,
5123                                const my_socket *fd,
5124                                const struct sockaddr *addr,
5125                                socklen_t addr_len)
5126 {
5127   PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5128   DBUG_ASSERT(pfs != NULL);
5129 
5130   /** Set socket descriptor */
5131   if (fd != NULL)
5132     pfs->m_fd= (uint)*fd;
5133 
5134   /** Set raw socket address and length */
5135   if (likely(addr != NULL && addr_len > 0))
5136   {
5137     pfs->m_addr_len= addr_len;
5138 
5139     /** Restrict address length to size of struct */
5140     if (unlikely(pfs->m_addr_len > sizeof(sockaddr_storage)))
5141       pfs->m_addr_len= sizeof(struct sockaddr_storage);
5142 
5143     memcpy(&pfs->m_sock_addr, addr, pfs->m_addr_len);
5144   }
5145 }
5146 
5147 /**
5148   Implementation of the socket instrumentation interface.
5149   @sa PSI_v1::set_socket_info.
5150 */
set_socket_thread_owner_v1(PSI_socket * socket)5151 static void set_socket_thread_owner_v1(PSI_socket *socket)
5152 {
5153   PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*>(socket);
5154   DBUG_ASSERT(pfs_socket != NULL);
5155   pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5156 }
5157 
5158 struct PSI_digest_locker*
pfs_digest_start_v1(PSI_statement_locker * locker)5159 pfs_digest_start_v1(PSI_statement_locker *locker)
5160 {
5161   PSI_statement_locker_state *statement_state;
5162   statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5163   DBUG_ASSERT(statement_state != NULL);
5164 
5165   if (statement_state->m_discarded)
5166     return NULL;
5167 
5168   if (statement_state->m_flags & STATE_FLAG_DIGEST)
5169   {
5170     return reinterpret_cast<PSI_digest_locker*> (locker);
5171   }
5172 
5173   return NULL;
5174 }
5175 
pfs_digest_end_v1(PSI_digest_locker * locker,const sql_digest_storage * digest)5176 void pfs_digest_end_v1(PSI_digest_locker *locker, const sql_digest_storage *digest)
5177 {
5178   PSI_statement_locker_state *statement_state;
5179   statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5180   DBUG_ASSERT(statement_state != NULL);
5181   DBUG_ASSERT(digest != NULL);
5182 
5183   if (statement_state->m_discarded)
5184     return;
5185 
5186   if (statement_state->m_flags & STATE_FLAG_DIGEST)
5187   {
5188     statement_state->m_digest= digest;
5189   }
5190 }
5191 
5192 /**
5193   Implementation of the thread attribute connection interface
5194   @sa PSI_v1::set_thread_connect_attr.
5195 */
set_thread_connect_attrs_v1(const char * buffer,uint length,const void * from_cs)5196 static int set_thread_connect_attrs_v1(const char *buffer, uint length,
5197                                        const void *from_cs)
5198 {
5199 
5200   PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5201 
5202   DBUG_ASSERT(buffer != NULL);
5203 
5204   if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
5205   {
5206     const CHARSET_INFO *cs = static_cast<const CHARSET_INFO *> (from_cs);
5207 
5208     /* copy from the input buffer as much as we can fit */
5209     uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
5210                            length : session_connect_attrs_size_per_thread);
5211     thd->m_session_lock.allocated_to_dirty();
5212     memcpy(thd->m_session_connect_attrs, buffer, copy_size);
5213     thd->m_session_connect_attrs_length= copy_size;
5214     thd->m_session_connect_attrs_cs_number= cs->number;
5215     thd->m_session_lock.dirty_to_allocated();
5216 
5217     if (copy_size == length)
5218       return 0;
5219 
5220     session_connect_attrs_lost++;
5221     return 1;
5222   }
5223   return 0;
5224 }
5225 
5226 
5227 /**
5228   Implementation of the instrumentation interface.
5229   @sa PSI_v1.
5230 */
5231 PSI_v1 PFS_v1=
5232 {
5233   register_mutex_v1,
5234   register_rwlock_v1,
5235   register_cond_v1,
5236   register_thread_v1,
5237   register_file_v1,
5238   register_stage_v1,
5239   register_statement_v1,
5240   register_socket_v1,
5241   init_mutex_v1,
5242   destroy_mutex_v1,
5243   init_rwlock_v1,
5244   destroy_rwlock_v1,
5245   init_cond_v1,
5246   destroy_cond_v1,
5247   init_socket_v1,
5248   destroy_socket_v1,
5249   get_table_share_v1,
5250   release_table_share_v1,
5251   drop_table_share_v1,
5252   open_table_v1,
5253   unbind_table_v1,
5254   rebind_table_v1,
5255   close_table_v1,
5256   create_file_v1,
5257   spawn_thread_v1,
5258   new_thread_v1,
5259   set_thread_id_v1,
5260   get_thread_v1,
5261   set_thread_user_v1,
5262   set_thread_account_v1,
5263   set_thread_db_v1,
5264   set_thread_command_v1,
5265   set_thread_start_time_v1,
5266   set_thread_state_v1,
5267   set_thread_info_v1,
5268   set_thread_v1,
5269   delete_current_thread_v1,
5270   delete_thread_v1,
5271   get_thread_file_name_locker_v1,
5272   get_thread_file_stream_locker_v1,
5273   get_thread_file_descriptor_locker_v1,
5274   unlock_mutex_v1,
5275   unlock_rwlock_v1,
5276   signal_cond_v1,
5277   broadcast_cond_v1,
5278   start_idle_wait_v1,
5279   end_idle_wait_v1,
5280   start_mutex_wait_v1,
5281   end_mutex_wait_v1,
5282   start_rwlock_wait_v1, /* read */
5283   end_rwlock_rdwait_v1,
5284   start_rwlock_wait_v1, /* write */
5285   end_rwlock_wrwait_v1,
5286   start_cond_wait_v1,
5287   end_cond_wait_v1,
5288   start_table_io_wait_v1,
5289   end_table_io_wait_v1,
5290   start_table_lock_wait_v1,
5291   end_table_lock_wait_v1,
5292   start_file_open_wait_v1,
5293   end_file_open_wait_v1,
5294   end_file_open_wait_and_bind_to_descriptor_v1,
5295   start_file_wait_v1,
5296   end_file_wait_v1,
5297   start_file_close_wait_v1,
5298   end_file_close_wait_v1,
5299   start_stage_v1,
5300   end_stage_v1,
5301   get_thread_statement_locker_v1,
5302   refine_statement_v1,
5303   start_statement_v1,
5304   set_statement_text_v1,
5305   set_statement_lock_time_v1,
5306   set_statement_rows_sent_v1,
5307   set_statement_rows_examined_v1,
5308   inc_statement_created_tmp_disk_tables_v1,
5309   inc_statement_created_tmp_tables_v1,
5310   inc_statement_select_full_join_v1,
5311   inc_statement_select_full_range_join_v1,
5312   inc_statement_select_range_v1,
5313   inc_statement_select_range_check_v1,
5314   inc_statement_select_scan_v1,
5315   inc_statement_sort_merge_passes_v1,
5316   inc_statement_sort_range_v1,
5317   inc_statement_sort_rows_v1,
5318   inc_statement_sort_scan_v1,
5319   set_statement_no_index_used_v1,
5320   set_statement_no_good_index_used_v1,
5321   end_statement_v1,
5322   start_socket_wait_v1,
5323   end_socket_wait_v1,
5324   set_socket_state_v1,
5325   set_socket_info_v1,
5326   set_socket_thread_owner_v1,
5327   pfs_digest_start_v1,
5328   pfs_digest_end_v1,
5329   set_thread_connect_attrs_v1,
5330 };
5331 
get_interface(int version)5332 static void* get_interface(int version)
5333 {
5334   switch (version)
5335   {
5336   case PSI_VERSION_1:
5337     return &PFS_v1;
5338   default:
5339     return NULL;
5340   }
5341 }
5342 
5343 C_MODE_END
5344 
5345 struct PSI_bootstrap PFS_bootstrap=
5346 {
5347   get_interface
5348 };
5349